From 14e1459bf5a74d7a64069bef7cab0380efbd511f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 19 Feb 2025 15:06:22 +0100 Subject: [PATCH 01/16] Document settings --- crates/milli/src/vector/settings.rs | 205 ++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index f10407e42..4e9997028 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -20,58 +20,263 @@ pub struct EmbeddingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The source used to provide the embeddings. + /// + /// Which embedder parameters are available and mandatory is determined by the value of this setting. + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - Defaults to `openAi` pub source: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The name of the model to use. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `ollama` + /// + /// # Availability + /// + /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - For source `openAi`, defaults to `text-embedding-3-small` + /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` pub model: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The revision (commit SHA1) of the model to use. + /// + /// If unspecified, Meilisearch picks the latest revision of the model. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` + /// - Otherwise, defaults to `null` pub revision: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The pooling method to use. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - Defaults to `useModel` + /// + /// # Compatibility Note + /// + /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. pub pooling: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The API key to pass to the remote embedder while making requests. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`. + /// - For other sources, no bearer token is sent if this parameter is not set. + /// + /// # Note + /// + /// - This setting is partially hidden when returned by the settings pub api_key: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The expected dimensions of the embeddings produced by this embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `userProvided` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When the source is `openAi`, changing the value of this parameter always regenerates embeddings + /// - ๐ŸŒฑ For other sources, changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the dimensions is the maximum allowed by the model. + /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Whether to binary quantize the embeddings of this embedder. + /// + /// Binary quantized embeddings are smaller than regular embeddings, which improves + /// disk usage and retrieval speed, at the cost of relevancy. + /// + /// # Availability + /// + /// - This parameter is available for all embedders + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When set to `true`, embeddings are not regenerated, but they are binary quantized, which takes time. + /// + /// # Defaults + /// + /// - Defaults to `false` + /// + /// # Note + /// + /// As binary quantization is a destructive operation, it is not possible to disable again this setting after + /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you, + /// we recommend to use this parameter on a test index first. pub binary_quantized: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Rendered texts are truncated to this size. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text. + /// - ๐ŸŒฑ When decreased, embeddings are never regenerated + /// + /// # Default + /// + /// - Defaults to 400 pub document_template_max_bytes: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// URL to reach the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama` and `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ When modified for source `openAi`, embeddings are never regenerated + /// - ๐Ÿ—๏ธ When modified for sources `ollama` and `rest`, embeddings are always regenerated pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Template request to send to the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings pub request: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Template response indicating how to find the embeddings in the response from the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings pub response: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option>)] + /// Additional headers to send to the remote embedder. + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings pub headers: Setting>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Affine transformation applied to the semantic score to make it more comparable to the ranking score. + /// + /// # Availability + /// + /// - This parameter is available for all embedders + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings pub distribution: Setting, } From 526476e1688713dc21b5a55ac811fb311e3f1740 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 13:51:46 +0100 Subject: [PATCH 02/16] Move settings test to its own file --- crates/milli/src/update/settings.rs | 979 +---------------------- crates/milli/src/update/test_settings.rs | 962 ++++++++++++++++++++++ 2 files changed, 980 insertions(+), 961 deletions(-) create mode 100644 crates/milli/src/update/test_settings.rs diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 0d0648fc8..11682177f 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -1894,974 +1894,31 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + indexing_embedder, distribution, headers, binary_quantized: binary_quantize, })) } -#[cfg(test)] -mod tests { - use big_s::S; - use heed::types::Bytes; - use maplit::{btreemap, btreeset, hashset}; - use meili_snap::snapshot; - - use super::*; - use crate::error::Error; - use crate::index::tests::TempIndex; - use crate::update::ClearDocuments; - use crate::{db_snap, Criterion, Filter, SearchResult}; - - #[test] - fn set_and_reset_searchable_fields() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - let mut wtxn = index.write_txn().unwrap(); - - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 1, "name": "kevin", "age": 23 }, - { "id": 2, "name": "kevina", "age": 21}, - { "id": 3, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - - // We change the searchable fields to be the "name" field only. - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_searchable_fields(vec!["name".into()]); +fn deserialize_sub_embedder( + sub_embedder: serde_json::Value, + embedder_name: &str, + context: NestingContext, +) -> std::result::Result { + match deserr::deserialize::<_, _, deserr::errors::JsonError>(sub_embedder) { + Ok(sub_embedder) => Ok(sub_embedder), + Err(error) => { + let message = format!("{error}{}", context.nesting_embedders()); + Err(UserError::InvalidSettingsEmbedder { + embedder_name: context.embedder_name_with_context(embedder_name), + message, }) - .unwrap(); - - wtxn.commit().unwrap(); - - db_snap!(index, fields_ids_map, @r###" - 0 id | - 1 name | - 2 age | - "###); - db_snap!(index, searchable_fields, @r###"["name"]"###); - db_snap!(index, fieldids_weights_map, @r###" - fid weight - 1 0 | - "###); - - // Check that the searchable field is correctly set to "name" only. - let rtxn = index.read_txn().unwrap(); - // When we search for something that is not in - // the searchable fields it must not return any document. - let result = index.search(&rtxn).query("23").execute().unwrap(); - assert_eq!(result.documents_ids, Vec::::new()); - - // When we search for something that is in the searchable fields - // we must find the appropriate document. - let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); - let documents = index.documents(&rtxn, result.documents_ids).unwrap(); - let fid_map = index.fields_ids_map(&rtxn).unwrap(); - assert_eq!(documents.len(), 1); - assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); - drop(rtxn); - - // We change the searchable fields to be the "name" field only. - index - .update_settings(|settings| { - settings.reset_searchable_fields(); - }) - .unwrap(); - - db_snap!(index, fields_ids_map, @r###" - 0 id | - 1 name | - 2 age | - "###); - db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###); - db_snap!(index, fieldids_weights_map, @r###" - fid weight - 0 0 | - 1 0 | - 2 0 | - "###); - - // Check that the searchable field have been reset and documents are found now. - let rtxn = index.read_txn().unwrap(); - let fid_map = index.fields_ids_map(&rtxn).unwrap(); - let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap(); - snapshot!(format!("{user_defined_searchable_fields:?}"), @"None"); - // the searchable fields should contain all the fields - let searchable_fields = index.searchable_fields(&rtxn).unwrap(); - snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); - let result = index.search(&rtxn).query("23").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let documents = index.documents(&rtxn, result.documents_ids).unwrap(); - assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); - } - - #[test] - fn mixup_searchable_with_displayed_fields() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // First we send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - - // In the same transaction we change the displayed fields to be only the "age". - // We also change the searchable fields to be the "name" field only. - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_displayed_fields(vec!["age".into()]); - settings.set_searchable_fields(vec!["name".into()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), (&["age"][..])); - drop(rtxn); - - // We change the searchable fields to be the "name" field only. - index - .update_settings(|settings| { - settings.reset_searchable_fields(); - }) - .unwrap(); - - // Check that the displayed fields always contains only the "age" field. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), &["age"][..]); - } - - #[test] - fn default_displayed_fields() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, None); - } - - #[test] - fn set_and_reset_displayed_field() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_displayed_fields(vec!["age".into()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Check that the displayed fields are correctly set to only the "age" field. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), &["age"][..]); - drop(rtxn); - - // We reset the fields ids to become `None`, the default value. - index - .update_settings(|settings| { - settings.reset_displayed_fields(); - }) - .unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, None); - } - - #[test] - fn set_filterable_fields() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age") }); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age") }); - // Only count the field_id 0 and level 0 facet values. - // TODO we must support typed CSVs for numbers to be understood. - let fidmap = index.fields_ids_map(&rtxn).unwrap(); - for document in index.all_documents(&rtxn).unwrap() { - let document = document.unwrap(); - let json = crate::obkv_to_json(&fidmap.ids().collect::>(), &fidmap, document.1) - .unwrap(); - println!("json: {:?}", json); } - let count = index - .facet_id_f64_docids - .remap_key_type::() - // The faceted field id is 2u16 - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 3); - drop(rtxn); - - // Index a little more documents with new and current facets values. - index - .add_documents(documents!([ - { "id": 3, "name": "kevin2", "age": 23}, - { "id": 4, "name": "kevina2", "age": 21 }, - { "id": 5, "name": "benoit", "age": 35 } - ])) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 0 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 4); - - // Set the filterable fields to be the age and the name. - index - .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age"), S("name") }); - }) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age"), S("name") }); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 2 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 4); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 1 and level 0 facet values. - let count = index - .facet_id_string_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 1]) - .unwrap() - .count(); - assert_eq!(count, 5); - - // Remove the age from the filterable fields. - index - .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("name") }); - }) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("name") }); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 2 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 0); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 1 and level 0 facet values. - let count = index - .facet_id_string_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 1]) - .unwrap() - .count(); - assert_eq!(count, 5); - } - - #[test] - fn set_asc_desc_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - settings.set_displayed_fields(vec![S("name")]); - settings.set_criteria(vec![Criterion::Asc("age".to_owned())]); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - let documents = index.documents(&rtxn, documents_ids).unwrap(); - - // Fetch the documents "age" field in the ordre in which the documents appear. - let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap(); - let iter = documents.into_iter().map(|(_, doc)| { - let bytes = doc.get(age_field_id).unwrap(); - let string = std::str::from_utf8(bytes).unwrap(); - string.parse::().unwrap() - }); - - assert_eq!(iter.collect::>(), vec![21, 23, 34]); - } - - #[test] - fn set_distinct_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - // Don't display the generated `id` field. - settings.set_displayed_fields(vec![S("name"), S("age")]); - settings.set_distinct_field(S("age")); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23 }, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 }, - { "id": 3, "name": "bernard", "age": 34 }, - { "id": 4, "name": "bertrand", "age": 34 }, - { "id": 5, "name": "bernie", "age": 34 }, - { "id": 6, "name": "ben", "age": 34 } - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - - // There must be at least one document with a 34 as the age. - assert_eq!(documents_ids.len(), 3); - } - - #[test] - fn set_nested_distinct_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - // Don't display the generated `id` field. - settings.set_displayed_fields(vec![S("person")]); - settings.set_distinct_field(S("person.age")); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "person": { "name": "kevin", "age": 23 }}, - { "id": 1, "person": { "name": "kevina", "age": 21 }}, - { "id": 2, "person": { "name": "benoit", "age": 34 }}, - { "id": 3, "person": { "name": "bernard", "age": 34 }}, - { "id": 4, "person": { "name": "bertrand", "age": 34 }}, - { "id": 5, "person": { "name": "bernie", "age": 34 }}, - { "id": 6, "person": { "name": "ben", "age": 34 }} - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - - // There must be at least one document with a 34 as the age. - assert_eq!(documents_ids.len(), 3); - } - - #[test] - fn default_stop_words() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Ensure there is no stop_words by default - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_none()); - } - - #[test] - fn set_and_reset_stop_words() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // First we send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs" }, - { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best" }, - { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good" }, - ]), - ) - .unwrap(); - - // In the same transaction we provide some stop_words - let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() }; - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_stop_words(set.clone()); - }) - .unwrap(); - - wtxn.commit().unwrap(); - - // Ensure stop_words are effectively stored - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_some()); // at this point the index should return something - - let stop_words = stop_words.unwrap(); - let expected = fst::Set::from_iter(&set).unwrap(); - assert_eq!(stop_words.as_fst().as_bytes(), expected.as_fst().as_bytes()); - - // when we search for something that is a non prefix stop_words it should be ignored - // thus we should get a placeholder search (all the results = 3) - let result = index.search(&rtxn).query("the ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - let result = index.search(&rtxn).query("i ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - let result = index.search(&rtxn).query("are ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - - let result = index.search(&rtxn).query("dog").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos - let result = index.search(&rtxn).query("benoรฎt").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data - - // now we'll reset the stop_words and ensure it's None - index - .update_settings(|settings| { - settings.reset_stop_words(); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_none()); - - // now we can search for the stop words - let result = index.search(&rtxn).query("the").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - let result = index.search(&rtxn).query("i").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("are").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - - // the rest of the search is still not impacted - let result = index.search(&rtxn).query("dog").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos - let result = index.search(&rtxn).query("benoรฎt").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data - } - - #[test] - fn set_and_reset_synonyms() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs"}, - { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best"}, - { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good"}, - ]), - ) - .unwrap(); - - // In the same transaction provide some synonyms - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_synonyms(btreemap! { - "blini".to_string() => vec!["crepes".to_string()], - "super like".to_string() => vec!["love".to_string()], - "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] - }); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Ensure synonyms are effectively stored - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(!synonyms.is_empty()); // at this point the index should return something - - // Check that we can use synonyms - let result = index.search(&rtxn).query("blini").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("super like").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("puppies").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - - // Reset the synonyms - index - .update_settings(|settings| { - settings.reset_synonyms(); - }) - .unwrap(); - - // Ensure synonyms are reset - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(synonyms.is_empty()); - - // Check that synonyms are no longer work - let result = index.search(&rtxn).query("blini").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - let result = index.search(&rtxn).query("super like").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - let result = index.search(&rtxn).query("puppies").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - } - - #[test] - fn thai_synonyms() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "เธขเธตเนˆเธ›เธธเนˆเธ™" }, - { "id": 1, "name": "เธเธตเนˆเธ›เธธเนˆเธ™" }, - ]), - ) - .unwrap(); - - // In the same transaction provide some synonyms - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_synonyms(btreemap! { - "japanese".to_string() => vec![S("เธเธตเนˆเธ›เธธเนˆเธ™"), S("เธขเธตเนˆเธ›เธธเนˆเธ™")], - }); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Ensure synonyms are effectively stored - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(!synonyms.is_empty()); // at this point the index should return something - - // Check that we can use synonyms - let result = index.search(&rtxn).query("japanese").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - } - - #[test] - fn setting_searchable_recomputes_other_settings() { - let index = TempIndex::new(); - - // Set all the settings except searchable - index - .update_settings(|settings| { - settings.set_displayed_fields(vec!["hello".to_string()]); - settings.set_filterable_fields(hashset! { S("age"), S("toto") }); - settings.set_criteria(vec![Criterion::Asc(S("toto"))]); - }) - .unwrap(); - - // check the output - let rtxn = index.read_txn().unwrap(); - assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); - // since no documents have been pushed the primary key is still unset - assert!(index.primary_key(&rtxn).unwrap().is_none()); - assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); - drop(rtxn); - - // We set toto and age as searchable to force reordering of the fields - index - .update_settings(|settings| { - settings.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); - assert!(index.primary_key(&rtxn).unwrap().is_none()); - assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); - } - - #[test] - fn setting_not_filterable_cant_filter() { - let index = TempIndex::new(); - - // Set all the settings except searchable - index - .update_settings(|settings| { - settings.set_displayed_fields(vec!["hello".to_string()]); - // It is only Asc(toto), there is a facet database but it is denied to filter with toto. - settings.set_criteria(vec![Criterion::Asc(S("toto"))]); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - let filter = Filter::from_str("toto = 32").unwrap().unwrap(); - let _ = filter.evaluate(&rtxn, &index).unwrap_err(); - } - - #[test] - fn setting_primary_key() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Set the primary key settings - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_primary_key(S("mykey")); - }) - .unwrap(); - wtxn.commit().unwrap(); - let mut wtxn = index.write_txn().unwrap(); - assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); - - // Then index some documents with the "mykey" primary key. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "mykey": 1, "name": "kevin", "age": 23 }, - { "mykey": 2, "name": "kevina", "age": 21 }, - { "mykey": 3, "name": "benoit", "age": 34 }, - { "mykey": 4, "name": "bernard", "age": 34 }, - { "mykey": 5, "name": "bertrand", "age": 34 }, - { "mykey": 6, "name": "bernie", "age": 34 }, - { "mykey": 7, "name": "ben", "age": 34 } - ]), - ) - .unwrap(); - wtxn.commit().unwrap(); - - // Updating settings with the same primary key should do nothing - let mut wtxn = index.write_txn().unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_primary_key(S("mykey")); - }) - .unwrap(); - assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); - wtxn.commit().unwrap(); - - // Updating the settings with a different (or no) primary key causes an error - let mut wtxn = index.write_txn().unwrap(); - let error = index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.reset_primary_key(); - }) - .unwrap_err(); - assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); - wtxn.abort(); - - // But if we clear the database... - let mut wtxn = index.write_txn().unwrap(); - let builder = ClearDocuments::new(&mut wtxn, &index); - builder.execute().unwrap(); - wtxn.commit().unwrap(); - - // ...we can change the primary key - index - .update_settings(|settings| { - settings.set_primary_key(S("myid")); - }) - .unwrap(); - } - - #[test] - fn setting_impact_relevancy() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("genres") }); - }) - .unwrap(); - - index.add_documents(documents!([ - { - "id": 11, - "title": "Star Wars", - "overview": - "Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.", - "genres": ["Adventure", "Action", "Science Fiction"], - "poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg", - "release_date": 233366400 - }, - { - "id": 30, - "title": "Magnetic Rose", - "overview": "", - "genres": ["Animation", "Science Fiction"], - "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", - "release_date": 819676800 - } - ])).unwrap(); - - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap(); - let first_id = documents_ids[0]; - let documents = index.documents(&rtxn, documents_ids).unwrap(); - let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap(); - - let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap(); - let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); - assert_eq!(line, r#""Star Wars""#); - } - - #[test] - fn test_disable_typo() { - let index = TempIndex::new(); - - let mut txn = index.write_txn().unwrap(); - assert!(index.authorize_typos(&txn).unwrap()); - - index - .update_settings_using_wtxn(&mut txn, |settings| { - settings.set_autorize_typos(false); - }) - .unwrap(); - - assert!(!index.authorize_typos(&txn).unwrap()); - } - - #[test] - fn update_min_word_len_for_typo() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_min_word_len_one_typo(8); - settings.set_min_word_len_two_typos(8); - }) - .unwrap(); - - let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8); - assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8); - - index - .update_settings(|settings| { - settings.reset_min_word_len_one_typo(); - settings.reset_min_word_len_two_typos(); - }) - .unwrap(); - - let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO); - assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS); - } - - #[test] - fn update_invalid_min_word_len_for_typo() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_min_word_len_one_typo(10); - settings.set_min_word_len_two_typos(7); - }) - .unwrap_err(); - } - - #[test] - fn update_exact_words_normalization() { - let index = TempIndex::new(); - - let mut txn = index.write_txn().unwrap(); - // Set the genres setting - index - .update_settings_using_wtxn(&mut txn, |settings| { - let words = btreeset! { S("Ab"), S("ac") }; - settings.set_exact_words(words); - }) - .unwrap(); - - let exact_words = index.exact_words(&txn).unwrap().unwrap(); - for word in exact_words.into_fst().stream().into_str_vec().unwrap() { - assert!(word.0 == "ac" || word.0 == "ab"); - } - } - - #[test] - fn test_correct_settings_init() { - let index = TempIndex::new(); - - index - .update_settings(|settings| { - // we don't actually update the settings, just check their content - let Settings { - wtxn: _, - index: _, - indexer_config: _, - searchable_fields, - displayed_fields, - filterable_fields, - sortable_fields, - criteria, - stop_words, - non_separator_tokens, - separator_tokens, - dictionary, - distinct_field, - synonyms, - primary_key, - authorize_typos, - min_word_len_two_typos, - min_word_len_one_typo, - exact_words, - exact_attributes, - max_values_per_facet, - sort_facet_values_by, - pagination_max_total_hits, - proximity_precision, - embedder_settings, - search_cutoff, - localized_attributes_rules, - prefix_search, - facet_search, - } = settings; - assert!(matches!(searchable_fields, Setting::NotSet)); - assert!(matches!(displayed_fields, Setting::NotSet)); - assert!(matches!(filterable_fields, Setting::NotSet)); - assert!(matches!(sortable_fields, Setting::NotSet)); - assert!(matches!(criteria, Setting::NotSet)); - assert!(matches!(stop_words, Setting::NotSet)); - assert!(matches!(non_separator_tokens, Setting::NotSet)); - assert!(matches!(separator_tokens, Setting::NotSet)); - assert!(matches!(dictionary, Setting::NotSet)); - assert!(matches!(distinct_field, Setting::NotSet)); - assert!(matches!(synonyms, Setting::NotSet)); - assert!(matches!(primary_key, Setting::NotSet)); - assert!(matches!(authorize_typos, Setting::NotSet)); - assert!(matches!(min_word_len_two_typos, Setting::NotSet)); - assert!(matches!(min_word_len_one_typo, Setting::NotSet)); - assert!(matches!(exact_words, Setting::NotSet)); - assert!(matches!(exact_attributes, Setting::NotSet)); - assert!(matches!(max_values_per_facet, Setting::NotSet)); - assert!(matches!(sort_facet_values_by, Setting::NotSet)); - assert!(matches!(pagination_max_total_hits, Setting::NotSet)); - assert!(matches!(proximity_precision, Setting::NotSet)); - assert!(matches!(embedder_settings, Setting::NotSet)); - assert!(matches!(search_cutoff, Setting::NotSet)); - assert!(matches!(localized_attributes_rules, Setting::NotSet)); - assert!(matches!(prefix_search, Setting::NotSet)); - assert!(matches!(facet_search, Setting::NotSet)); - }) - .unwrap(); - } - - #[test] - fn settings_must_ignore_soft_deleted() { - use serde_json::json; - - let index = TempIndex::new(); - - let mut docs = vec![]; - for i in 0..10 { - docs.push(json!({ "id": i, "title": format!("{:x}", i) })); - } - index.add_documents(documents! { docs }).unwrap(); - - index.delete_documents((0..5).map(|id| id.to_string()).collect()); - - let mut wtxn = index.write_txn().unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_searchable_fields(vec!["id".to_string()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.write_txn().unwrap(); - let docs: StdResult, _> = index.all_documents(&rtxn).unwrap().collect(); - let docs = docs.unwrap(); - assert_eq!(docs.len(), 5); } } + +#[cfg(test)] +#[path = "test_settings.rs"] +mod tests; diff --git a/crates/milli/src/update/test_settings.rs b/crates/milli/src/update/test_settings.rs new file mode 100644 index 000000000..1b5992462 --- /dev/null +++ b/crates/milli/src/update/test_settings.rs @@ -0,0 +1,962 @@ +use big_s::S; +use heed::types::Bytes; +use maplit::{btreemap, btreeset, hashset}; +use meili_snap::snapshot; + +use super::*; +use crate::error::Error; +use crate::index::tests::TempIndex; +use crate::update::ClearDocuments; +use crate::{db_snap, Criterion, Filter, SearchResult}; + +#[test] +fn set_and_reset_searchable_fields() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + let mut wtxn = index.write_txn().unwrap(); + + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 1, "name": "kevin", "age": 23 }, + { "id": 2, "name": "kevina", "age": 21}, + { "id": 3, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + + // We change the searchable fields to be the "name" field only. + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_searchable_fields(vec!["name".into()]); + }) + .unwrap(); + + wtxn.commit().unwrap(); + + db_snap!(index, fields_ids_map, @r###" + 0 id | + 1 name | + 2 age | + "###); + db_snap!(index, searchable_fields, @r###"["name"]"###); + db_snap!(index, fieldids_weights_map, @r###" + fid weight + 1 0 | + "###); + + // Check that the searchable field is correctly set to "name" only. + let rtxn = index.read_txn().unwrap(); + // When we search for something that is not in + // the searchable fields it must not return any document. + let result = index.search(&rtxn).query("23").execute().unwrap(); + assert_eq!(result.documents_ids, Vec::::new()); + + // When we search for something that is in the searchable fields + // we must find the appropriate document. + let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); + let documents = index.documents(&rtxn, result.documents_ids).unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); + assert_eq!(documents.len(), 1); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); + drop(rtxn); + + // We change the searchable fields to be the "name" field only. + index + .update_settings(|settings| { + settings.reset_searchable_fields(); + }) + .unwrap(); + + db_snap!(index, fields_ids_map, @r###" + 0 id | + 1 name | + 2 age | + "###); + db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###); + db_snap!(index, fieldids_weights_map, @r###" + fid weight + 0 0 | + 1 0 | + 2 0 | + "###); + + // Check that the searchable field have been reset and documents are found now. + let rtxn = index.read_txn().unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); + let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap(); + snapshot!(format!("{user_defined_searchable_fields:?}"), @"None"); + // the searchable fields should contain all the fields + let searchable_fields = index.searchable_fields(&rtxn).unwrap(); + snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); + let result = index.search(&rtxn).query("23").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let documents = index.documents(&rtxn, result.documents_ids).unwrap(); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); +} + +#[test] +fn mixup_searchable_with_displayed_fields() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // First we send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + + // In the same transaction we change the displayed fields to be only the "age". + // We also change the searchable fields to be the "name" field only. + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_displayed_fields(vec!["age".into()]); + settings.set_searchable_fields(vec!["name".into()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), (&["age"][..])); + drop(rtxn); + + // We change the searchable fields to be the "name" field only. + index + .update_settings(|settings| { + settings.reset_searchable_fields(); + }) + .unwrap(); + + // Check that the displayed fields always contains only the "age" field. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), &["age"][..]); +} + +#[test] +fn default_displayed_fields() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, None); +} + +#[test] +fn set_and_reset_displayed_field() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_displayed_fields(vec!["age".into()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Check that the displayed fields are correctly set to only the "age" field. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), &["age"][..]); + drop(rtxn); + + // We reset the fields ids to become `None`, the default value. + index + .update_settings(|settings| { + settings.reset_displayed_fields(); + }) + .unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, None); +} + +#[test] +fn set_filterable_fields() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("age") }); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Check that the displayed fields are correctly set. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.filterable_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, hashset! { S("age") }); + // Only count the field_id 0 and level 0 facet values. + // TODO we must support typed CSVs for numbers to be understood. + let fidmap = index.fields_ids_map(&rtxn).unwrap(); + for document in index.all_documents(&rtxn).unwrap() { + let document = document.unwrap(); + let json = + crate::obkv_to_json(&fidmap.ids().collect::>(), &fidmap, document.1).unwrap(); + println!("json: {:?}", json); + } + let count = index + .facet_id_f64_docids + .remap_key_type::() + // The faceted field id is 2u16 + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 3); + drop(rtxn); + + // Index a little more documents with new and current facets values. + index + .add_documents(documents!([ + { "id": 3, "name": "kevin2", "age": 23}, + { "id": 4, "name": "kevina2", "age": 21 }, + { "id": 5, "name": "benoit", "age": 35 } + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 0 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 4); + + // Set the filterable fields to be the age and the name. + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("age"), S("name") }); + }) + .unwrap(); + + // Check that the displayed fields are correctly set. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.filterable_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, hashset! { S("age"), S("name") }); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 2 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 4); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 1 and level 0 facet values. + let count = index + .facet_id_string_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 1]) + .unwrap() + .count(); + assert_eq!(count, 5); + + // Remove the age from the filterable fields. + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("name") }); + }) + .unwrap(); + + // Check that the displayed fields are correctly set. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.filterable_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, hashset! { S("name") }); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 2 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 0); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 1 and level 0 facet values. + let count = index + .facet_id_string_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 1]) + .unwrap() + .count(); + assert_eq!(count, 5); +} + +#[test] +fn set_asc_desc_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + settings.set_displayed_fields(vec![S("name")]); + settings.set_criteria(vec![Criterion::Asc("age".to_owned())]); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + let documents = index.documents(&rtxn, documents_ids).unwrap(); + + // Fetch the documents "age" field in the ordre in which the documents appear. + let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap(); + let iter = documents.into_iter().map(|(_, doc)| { + let bytes = doc.get(age_field_id).unwrap(); + let string = std::str::from_utf8(bytes).unwrap(); + string.parse::().unwrap() + }); + + assert_eq!(iter.collect::>(), vec![21, 23, 34]); +} + +#[test] +fn set_distinct_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + // Don't display the generated `id` field. + settings.set_displayed_fields(vec![S("name"), S("age")]); + settings.set_distinct_field(S("age")); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23 }, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 }, + { "id": 3, "name": "bernard", "age": 34 }, + { "id": 4, "name": "bertrand", "age": 34 }, + { "id": 5, "name": "bernie", "age": 34 }, + { "id": 6, "name": "ben", "age": 34 } + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + + // There must be at least one document with a 34 as the age. + assert_eq!(documents_ids.len(), 3); +} + +#[test] +fn set_nested_distinct_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + // Don't display the generated `id` field. + settings.set_displayed_fields(vec![S("person")]); + settings.set_distinct_field(S("person.age")); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "person": { "name": "kevin", "age": 23 }}, + { "id": 1, "person": { "name": "kevina", "age": 21 }}, + { "id": 2, "person": { "name": "benoit", "age": 34 }}, + { "id": 3, "person": { "name": "bernard", "age": 34 }}, + { "id": 4, "person": { "name": "bertrand", "age": 34 }}, + { "id": 5, "person": { "name": "bernie", "age": 34 }}, + { "id": 6, "person": { "name": "ben", "age": 34 }} + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + + // There must be at least one document with a 34 as the age. + assert_eq!(documents_ids.len(), 3); +} + +#[test] +fn default_stop_words() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Ensure there is no stop_words by default + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_none()); +} + +#[test] +fn set_and_reset_stop_words() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // First we send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs" }, + { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best" }, + { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good" }, + ]), + ) + .unwrap(); + + // In the same transaction we provide some stop_words + let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() }; + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_stop_words(set.clone()); + }) + .unwrap(); + + wtxn.commit().unwrap(); + + // Ensure stop_words are effectively stored + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_some()); // at this point the index should return something + + let stop_words = stop_words.unwrap(); + let expected = fst::Set::from_iter(&set).unwrap(); + assert_eq!(stop_words.as_fst().as_bytes(), expected.as_fst().as_bytes()); + + // when we search for something that is a non prefix stop_words it should be ignored + // thus we should get a placeholder search (all the results = 3) + let result = index.search(&rtxn).query("the ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + let result = index.search(&rtxn).query("i ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + let result = index.search(&rtxn).query("are ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + + let result = index.search(&rtxn).query("dog").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos + let result = index.search(&rtxn).query("benoรฎt").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data + + // now we'll reset the stop_words and ensure it's None + index + .update_settings(|settings| { + settings.reset_stop_words(); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_none()); + + // now we can search for the stop words + let result = index.search(&rtxn).query("the").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + let result = index.search(&rtxn).query("i").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("are").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + + // the rest of the search is still not impacted + let result = index.search(&rtxn).query("dog").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos + let result = index.search(&rtxn).query("benoรฎt").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data +} + +#[test] +fn set_and_reset_synonyms() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs"}, + { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best"}, + { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good"}, + ]), + ) + .unwrap(); + + // In the same transaction provide some synonyms + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_synonyms(btreemap! { + "blini".to_string() => vec!["crepes".to_string()], + "super like".to_string() => vec!["love".to_string()], + "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] + }); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are effectively stored + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(!synonyms.is_empty()); // at this point the index should return something + + // Check that we can use synonyms + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + + // Reset the synonyms + index + .update_settings(|settings| { + settings.reset_synonyms(); + }) + .unwrap(); + + // Ensure synonyms are reset + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(synonyms.is_empty()); + + // Check that synonyms are no longer work + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert!(result.documents_ids.is_empty()); +} + +#[test] +fn thai_synonyms() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "เธขเธตเนˆเธ›เธธเนˆเธ™" }, + { "id": 1, "name": "เธเธตเนˆเธ›เธธเนˆเธ™" }, + ]), + ) + .unwrap(); + + // In the same transaction provide some synonyms + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_synonyms(btreemap! { + "japanese".to_string() => vec![S("เธเธตเนˆเธ›เธธเนˆเธ™"), S("เธขเธตเนˆเธ›เธธเนˆเธ™")], + }); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are effectively stored + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(!synonyms.is_empty()); // at this point the index should return something + + // Check that we can use synonyms + let result = index.search(&rtxn).query("japanese").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); +} + +#[test] +fn setting_searchable_recomputes_other_settings() { + let index = TempIndex::new(); + + // Set all the settings except searchable + index + .update_settings(|settings| { + settings.set_displayed_fields(vec!["hello".to_string()]); + settings.set_filterable_fields(hashset! { S("age"), S("toto") }); + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); + }) + .unwrap(); + + // check the output + let rtxn = index.read_txn().unwrap(); + assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); + // since no documents have been pushed the primary key is still unset + assert!(index.primary_key(&rtxn).unwrap().is_none()); + assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); + drop(rtxn); + + // We set toto and age as searchable to force reordering of the fields + index + .update_settings(|settings| { + settings.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); + assert!(index.primary_key(&rtxn).unwrap().is_none()); + assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); +} + +#[test] +fn setting_not_filterable_cant_filter() { + let index = TempIndex::new(); + + // Set all the settings except searchable + index + .update_settings(|settings| { + settings.set_displayed_fields(vec!["hello".to_string()]); + // It is only Asc(toto), there is a facet database but it is denied to filter with toto. + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let filter = Filter::from_str("toto = 32").unwrap().unwrap(); + let _ = filter.evaluate(&rtxn, &index).unwrap_err(); +} + +#[test] +fn setting_primary_key() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Set the primary key settings + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_primary_key(S("mykey")); + }) + .unwrap(); + wtxn.commit().unwrap(); + let mut wtxn = index.write_txn().unwrap(); + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); + + // Then index some documents with the "mykey" primary key. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "mykey": 1, "name": "kevin", "age": 23 }, + { "mykey": 2, "name": "kevina", "age": 21 }, + { "mykey": 3, "name": "benoit", "age": 34 }, + { "mykey": 4, "name": "bernard", "age": 34 }, + { "mykey": 5, "name": "bertrand", "age": 34 }, + { "mykey": 6, "name": "bernie", "age": 34 }, + { "mykey": 7, "name": "ben", "age": 34 } + ]), + ) + .unwrap(); + wtxn.commit().unwrap(); + + // Updating settings with the same primary key should do nothing + let mut wtxn = index.write_txn().unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_primary_key(S("mykey")); + }) + .unwrap(); + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); + wtxn.commit().unwrap(); + + // Updating the settings with a different (or no) primary key causes an error + let mut wtxn = index.write_txn().unwrap(); + let error = index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.reset_primary_key(); + }) + .unwrap_err(); + assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); + wtxn.abort(); + + // But if we clear the database... + let mut wtxn = index.write_txn().unwrap(); + let builder = ClearDocuments::new(&mut wtxn, &index); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + // ...we can change the primary key + index + .update_settings(|settings| { + settings.set_primary_key(S("myid")); + }) + .unwrap(); +} + +#[test] +fn setting_impact_relevancy() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("genres") }); + }) + .unwrap(); + + index.add_documents(documents!([ + { + "id": 11, + "title": "Star Wars", + "overview": + "Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.", + "genres": ["Adventure", "Action", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg", + "release_date": 233366400 + }, + { + "id": 30, + "title": "Magnetic Rose", + "overview": "", + "genres": ["Animation", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", + "release_date": 819676800 + } + ])).unwrap(); + + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap(); + let first_id = documents_ids[0]; + let documents = index.documents(&rtxn, documents_ids).unwrap(); + let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap(); + + let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap(); + let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); + assert_eq!(line, r#""Star Wars""#); +} + +#[test] +fn test_disable_typo() { + let index = TempIndex::new(); + + let mut txn = index.write_txn().unwrap(); + assert!(index.authorize_typos(&txn).unwrap()); + + index + .update_settings_using_wtxn(&mut txn, |settings| { + settings.set_autorize_typos(false); + }) + .unwrap(); + + assert!(!index.authorize_typos(&txn).unwrap()); +} + +#[test] +fn update_min_word_len_for_typo() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_min_word_len_one_typo(8); + settings.set_min_word_len_two_typos(8); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8); + + index + .update_settings(|settings| { + settings.reset_min_word_len_one_typo(); + settings.reset_min_word_len_two_typos(); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS); +} + +#[test] +fn update_invalid_min_word_len_for_typo() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_min_word_len_one_typo(10); + settings.set_min_word_len_two_typos(7); + }) + .unwrap_err(); +} + +#[test] +fn update_exact_words_normalization() { + let index = TempIndex::new(); + + let mut txn = index.write_txn().unwrap(); + // Set the genres setting + index + .update_settings_using_wtxn(&mut txn, |settings| { + let words = btreeset! { S("Ab"), S("ac") }; + settings.set_exact_words(words); + }) + .unwrap(); + + let exact_words = index.exact_words(&txn).unwrap().unwrap(); + for word in exact_words.into_fst().stream().into_str_vec().unwrap() { + assert!(word.0 == "ac" || word.0 == "ab"); + } +} + +#[test] +fn test_correct_settings_init() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + // we don't actually update the settings, just check their content + let Settings { + wtxn: _, + index: _, + indexer_config: _, + searchable_fields, + displayed_fields, + filterable_fields, + sortable_fields, + criteria, + stop_words, + non_separator_tokens, + separator_tokens, + dictionary, + distinct_field, + synonyms, + primary_key, + authorize_typos, + min_word_len_two_typos, + min_word_len_one_typo, + exact_words, + exact_attributes, + max_values_per_facet, + sort_facet_values_by, + pagination_max_total_hits, + proximity_precision, + embedder_settings, + search_cutoff, + localized_attributes_rules, + prefix_search, + facet_search, + } = settings; + assert!(matches!(searchable_fields, Setting::NotSet)); + assert!(matches!(displayed_fields, Setting::NotSet)); + assert!(matches!(filterable_fields, Setting::NotSet)); + assert!(matches!(sortable_fields, Setting::NotSet)); + assert!(matches!(criteria, Setting::NotSet)); + assert!(matches!(stop_words, Setting::NotSet)); + assert!(matches!(non_separator_tokens, Setting::NotSet)); + assert!(matches!(separator_tokens, Setting::NotSet)); + assert!(matches!(dictionary, Setting::NotSet)); + assert!(matches!(distinct_field, Setting::NotSet)); + assert!(matches!(synonyms, Setting::NotSet)); + assert!(matches!(primary_key, Setting::NotSet)); + assert!(matches!(authorize_typos, Setting::NotSet)); + assert!(matches!(min_word_len_two_typos, Setting::NotSet)); + assert!(matches!(min_word_len_one_typo, Setting::NotSet)); + assert!(matches!(exact_words, Setting::NotSet)); + assert!(matches!(exact_attributes, Setting::NotSet)); + assert!(matches!(max_values_per_facet, Setting::NotSet)); + assert!(matches!(sort_facet_values_by, Setting::NotSet)); + assert!(matches!(pagination_max_total_hits, Setting::NotSet)); + assert!(matches!(proximity_precision, Setting::NotSet)); + assert!(matches!(embedder_settings, Setting::NotSet)); + assert!(matches!(search_cutoff, Setting::NotSet)); + assert!(matches!(localized_attributes_rules, Setting::NotSet)); + assert!(matches!(prefix_search, Setting::NotSet)); + assert!(matches!(facet_search, Setting::NotSet)); + }) + .unwrap(); +} + +#[test] +fn settings_must_ignore_soft_deleted() { + use serde_json::json; + + let index = TempIndex::new(); + + let mut docs = vec![]; + for i in 0..10 { + docs.push(json!({ "id": i, "title": format!("{:x}", i) })); + } + index.add_documents(documents! { docs }).unwrap(); + + index.delete_documents((0..5).map(|id| id.to_string()).collect()); + + let mut wtxn = index.write_txn().unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_searchable_fields(vec!["id".to_string()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.write_txn().unwrap(); + let docs: StdResult, _> = index.all_documents(&rtxn).unwrap().collect(); + let docs = docs.unwrap(); + assert_eq!(docs.len(), 5); +} From 8d2d9066ba9b91000cee669fccae8562693ffb51 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 20 Feb 2025 11:35:10 +0100 Subject: [PATCH 03/16] Add composite embedder --- crates/milli/src/vector/composite.rs | 280 +++++++++++++++++++++++++++ crates/milli/src/vector/error.rs | 80 ++++++++ crates/milli/src/vector/mod.rs | 2 + 3 files changed, 362 insertions(+) create mode 100644 crates/milli/src/vector/composite.rs diff --git a/crates/milli/src/vector/composite.rs b/crates/milli/src/vector/composite.rs new file mode 100644 index 000000000..d174232bf --- /dev/null +++ b/crates/milli/src/vector/composite.rs @@ -0,0 +1,280 @@ +use std::time::Instant; + +use arroy::Distance; + +use super::error::CompositeEmbedderContainsHuggingFace; +use super::{ + hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, NewEmbedderError, +}; +use crate::ThreadPoolNoAbort; + +#[derive(Debug)] +pub enum SubEmbedder { + /// An embedder based on running local models, fetched from the Hugging Face Hub. + HuggingFace(hf::Embedder), + /// An embedder based on making embedding queries against the OpenAI API. + OpenAi(openai::Embedder), + /// An embedder based on the user providing the embeddings in the documents and queries. + UserProvided(manual::Embedder), + /// An embedder based on making embedding queries against an embedding server. + Ollama(ollama::Embedder), + /// An embedder based on making embedding queries against a generic JSON/REST embedding server. + Rest(rest::Embedder), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub enum SubEmbedderOptions { + HuggingFace(hf::EmbedderOptions), + OpenAi(openai::EmbedderOptions), + Ollama(ollama::EmbedderOptions), + UserProvided(manual::EmbedderOptions), + Rest(rest::EmbedderOptions), +} + +impl SubEmbedderOptions { + pub fn distribution(&self) -> Option { + match self { + SubEmbedderOptions::HuggingFace(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::OpenAi(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::Ollama(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::UserProvided(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::Rest(embedder_options) => embedder_options.distribution, + } + } +} + +#[derive(Debug)] +pub struct Embedder { + pub(super) search: SubEmbedder, + pub(super) index: SubEmbedder, +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub struct EmbedderOptions { + pub search: SubEmbedderOptions, + pub index: SubEmbedderOptions, +} + +impl Embedder { + pub fn new( + EmbedderOptions { search, index }: EmbedderOptions, + ) -> Result { + let search = SubEmbedder::new(search)?; + let index = SubEmbedder::new(index)?; + + // check dimensions + if search.dimensions() != index.dimensions() { + return Err(NewEmbedderError::composite_dimensions_mismatch( + search.dimensions(), + index.dimensions(), + )); + } + // check similarity + let search_embeddings = search + .embed( + vec![ + "test".into(), + "a brave dog".into(), + "This is a sample text. It is meant to compare similarity.".into(), + ], + None, + ) + .map_err(|error| NewEmbedderError::composite_test_embedding_failed(error, "search"))?; + + let index_embeddings = index + .embed( + vec![ + "test".into(), + "a brave dog".into(), + "This is a sample text. It is meant to compare similarity.".into(), + ], + None, + ) + .map_err(|error| { + NewEmbedderError::composite_test_embedding_failed(error, "indexing") + })?; + + let hint = configuration_hint(&search, &index); + + check_similarity(search_embeddings, index_embeddings, hint)?; + + Ok(Self { search, index }) + } + + /// Indicates the dimensions of a single embedding produced by the embedder. + pub fn dimensions(&self) -> usize { + // can use the dimensions of any embedder since they should match + self.index.dimensions() + } + + /// An optional distribution used to apply an affine transformation to the similarity score of a document. + pub fn distribution(&self) -> Option { + // 3 cases here: + // 1. distribution provided by user => use that one, which was stored in search + // 2. no user-provided distribution, distribution in search embedder => use that one + // 2. no user-provided distribution, no distribution in search embedder => use the distribution in indexing embedder + self.search.distribution().or_else(|| self.index.distribution()) + } +} + +impl SubEmbedder { + pub fn new(options: SubEmbedderOptions) -> std::result::Result { + Ok(match options { + SubEmbedderOptions::HuggingFace(options) => { + Self::HuggingFace(hf::Embedder::new(options)?) + } + SubEmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), + SubEmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?), + SubEmbedderOptions::UserProvided(options) => { + Self::UserProvided(manual::Embedder::new(options)) + } + SubEmbedderOptions::Rest(options) => { + Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?) + } + }) + } + + pub fn embed( + &self, + texts: Vec, + deadline: Option, + ) -> std::result::Result, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed(texts), + SubEmbedder::OpenAi(embedder) => embedder.embed(&texts, deadline), + SubEmbedder::Ollama(embedder) => embedder.embed(&texts, deadline), + SubEmbedder::UserProvided(embedder) => embedder.embed(&texts), + SubEmbedder::Rest(embedder) => embedder.embed(texts, deadline), + } + } + + /// Embed multiple chunks of texts. + /// + /// Each chunk is composed of one or multiple texts. + pub fn embed_index( + &self, + text_chunks: Vec>, + threads: &ThreadPoolNoAbort, + ) -> std::result::Result>, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), + SubEmbedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::UserProvided(embedder) => embedder.embed_index(text_chunks), + SubEmbedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), + } + } + + /// Non-owning variant of [`Self::embed_index`]. + pub fn embed_index_ref( + &self, + texts: &[&str], + threads: &ThreadPoolNoAbort, + ) -> std::result::Result, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), + SubEmbedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::UserProvided(embedder) => embedder.embed_index_ref(texts), + SubEmbedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), + } + } + + /// Indicates the preferred number of chunks to pass to [`Self::embed_chunks`] + pub fn chunk_count_hint(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.chunk_count_hint(), + SubEmbedder::OpenAi(embedder) => embedder.chunk_count_hint(), + SubEmbedder::Ollama(embedder) => embedder.chunk_count_hint(), + SubEmbedder::UserProvided(_) => 100, + SubEmbedder::Rest(embedder) => embedder.chunk_count_hint(), + } + } + + /// Indicates the preferred number of texts in a single chunk passed to [`Self::embed`] + pub fn prompt_count_in_chunk_hint(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::UserProvided(_) => 1, + SubEmbedder::Rest(embedder) => embedder.prompt_count_in_chunk_hint(), + } + } + + pub fn uses_document_template(&self) -> bool { + match self { + SubEmbedder::HuggingFace(_) + | SubEmbedder::OpenAi(_) + | SubEmbedder::Ollama(_) + | SubEmbedder::Rest(_) => true, + SubEmbedder::UserProvided(_) => false, + } + } + + /// Indicates the dimensions of a single embedding produced by the embedder. + pub fn dimensions(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.dimensions(), + SubEmbedder::OpenAi(embedder) => embedder.dimensions(), + SubEmbedder::Ollama(embedder) => embedder.dimensions(), + SubEmbedder::UserProvided(embedder) => embedder.dimensions(), + SubEmbedder::Rest(embedder) => embedder.dimensions(), + } + } + + /// An optional distribution used to apply an affine transformation to the similarity score of a document. + pub fn distribution(&self) -> Option { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.distribution(), + SubEmbedder::OpenAi(embedder) => embedder.distribution(), + SubEmbedder::Ollama(embedder) => embedder.distribution(), + SubEmbedder::UserProvided(embedder) => embedder.distribution(), + SubEmbedder::Rest(embedder) => embedder.distribution(), + } + } +} + +fn check_similarity( + left: Vec, + right: Vec, + hint: CompositeEmbedderContainsHuggingFace, +) -> Result<(), NewEmbedderError> { + if left.len() != right.len() { + return Err(NewEmbedderError::composite_embedding_count_mismatch(left.len(), right.len())); + } + + for (left, right) in left.into_iter().zip(right) { + let left = arroy::internals::UnalignedVector::from_slice(&left); + let right = arroy::internals::UnalignedVector::from_slice(&right); + let left = arroy::internals::Leaf { + header: arroy::distances::Cosine::new_header(&left), + vector: left, + }; + let right = arroy::internals::Leaf { + header: arroy::distances::Cosine::new_header(&right), + vector: right, + }; + + let distance = arroy::distances::Cosine::built_distance(&left, &right); + + if distance > super::MAX_COMPOSITE_DISTANCE { + return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint)); + } + } + Ok(()) +} + +fn configuration_hint( + search: &SubEmbedder, + index: &SubEmbedder, +) -> CompositeEmbedderContainsHuggingFace { + match (search, index) { + (SubEmbedder::HuggingFace(_), SubEmbedder::HuggingFace(_)) => { + CompositeEmbedderContainsHuggingFace::Both + } + (SubEmbedder::HuggingFace(_), _) => CompositeEmbedderContainsHuggingFace::Search, + (_, SubEmbedder::HuggingFace(_)) => CompositeEmbedderContainsHuggingFace::Indexing, + _ => CompositeEmbedderContainsHuggingFace::None, + } +} diff --git a/crates/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs index 650249bff..0993ded1d 100644 --- a/crates/milli/src/vector/error.rs +++ b/crates/milli/src/vector/error.rs @@ -6,6 +6,7 @@ use hf_hub::api::sync::ApiError; use super::parsed_vectors::ParsedVectorsDiff; use super::rest::ConfigurationSource; +use super::MAX_COMPOSITE_DISTANCE; use crate::error::FaultSource; use crate::update::new::vector_document::VectorDocument; use crate::{FieldDistribution, PanicCatched}; @@ -335,6 +336,77 @@ impl NewEmbedderError { pub(crate) fn ollama_unsupported_url(url: String) -> NewEmbedderError { Self { kind: NewEmbedderErrorKind::OllamaUnsupportedUrl(url), fault: FaultSource::User } } + + pub(crate) fn composite_dimensions_mismatch( + search_dimensions: usize, + index_dimensions: usize, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeDimensionsMismatch { + search_dimensions, + index_dimensions, + }, + fault: FaultSource::User, + } + } + + pub(crate) fn composite_test_embedding_failed( + inner: EmbedError, + failing_embedder: &'static str, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeTestEmbeddingFailed { inner, failing_embedder }, + fault: FaultSource::Runtime, + } + } + + pub(crate) fn composite_embedding_count_mismatch( + search_count: usize, + index_count: usize, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeEmbeddingCountMismatch { + search_count, + index_count, + }, + fault: FaultSource::Runtime, + } + } + + pub(crate) fn composite_embedding_value_mismatch( + distance: f32, + hint: CompositeEmbedderContainsHuggingFace, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeEmbeddingValueMismatch { distance, hint }, + fault: FaultSource::User, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum CompositeEmbedderContainsHuggingFace { + Both, + Search, + Indexing, + None, +} + +impl std::fmt::Display for CompositeEmbedderContainsHuggingFace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CompositeEmbedderContainsHuggingFace::Both => f.write_str( + "\n - Make sure the `model`, `revision` and `pooling` of both embedders match.", + ), + CompositeEmbedderContainsHuggingFace::Search => f.write_str( + "\n - Consider trying a different `pooling` method for the search embedder.", + ), + CompositeEmbedderContainsHuggingFace::Indexing => f.write_str( + "\n - Consider trying a different `pooling` method for the indexing embedder.", + ), + CompositeEmbedderContainsHuggingFace::None => Ok(()), + } + } } #[derive(Debug, thiserror::Error)] @@ -419,6 +491,14 @@ pub enum NewEmbedderErrorKind { CouldNotParseTemplate(String), #[error("unsupported Ollama URL.\n - For `ollama` sources, the URL must end with `/api/embed` or `/api/embeddings`\n - Got `{0}`")] OllamaUnsupportedUrl(String), + #[error("error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: {search_dimensions}\n - Indexing time dimensions: {index_dimensions}\n - Note: Dimensions of embeddings produced by both embedders are required to match.")] + CompositeDimensionsMismatch { search_dimensions: usize, index_dimensions: usize }, + #[error("error while generating test embeddings.\n - could not generate test embedding with embedder at {failing_embedder} time.\n - Embedding failed with {inner}")] + CompositeTestEmbeddingFailed { inner: EmbedError, failing_embedder: &'static str }, + #[error("error while generating test embeddings.\n - the number of generated embeddings differs.\n - {search_count} embeddings for the search time embedder.\n - {index_count} embeddings for the indexing time embedder.")] + CompositeEmbeddingCountMismatch { search_count: usize, index_count: usize }, + #[error("error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance {distance}\n - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n - Note: check that both embedders produce similar embeddings.{hint}")] + CompositeEmbeddingValueMismatch { distance: f32, hint: CompositeEmbedderContainsHuggingFace }, } pub struct PossibleEmbeddingMistakes { diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 74b52b1fe..47307295e 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -15,6 +15,7 @@ use self::error::{EmbedError, NewEmbedderError}; use crate::prompt::{Prompt, PromptData}; use crate::ThreadPoolNoAbort; +pub mod composite; pub mod error; pub mod hf; pub mod json_template; @@ -31,6 +32,7 @@ pub use self::error::Error; pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01; pub struct ArroyWrapper { quantized: bool, From 4a2643daa2ee3b0e712478ebe8a33bf371602699 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 20 Feb 2025 11:36:42 +0100 Subject: [PATCH 04/16] Rename embed_one to embed_search and embed_chunks* to embed_index* --- .../src/scheduler/test_embedders.rs | 7 ++-- crates/meilisearch/src/search/mod.rs | 2 +- crates/milli/src/search/hybrid.rs | 2 +- .../extract/extract_vector_points.rs | 2 +- .../src/update/new/extract/vectors/mod.rs | 2 +- crates/milli/src/vector/hf.rs | 4 +-- crates/milli/src/vector/manual.rs | 4 +-- crates/milli/src/vector/mod.rs | 32 +++++++++++-------- crates/milli/src/vector/ollama.rs | 4 +-- crates/milli/src/vector/openai.rs | 4 +-- crates/milli/src/vector/rest.rs | 4 +-- 11 files changed, 36 insertions(+), 31 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/test_embedders.rs b/crates/index-scheduler/src/scheduler/test_embedders.rs index b1c619441..05929b651 100644 --- a/crates/index-scheduler/src/scheduler/test_embedders.rs +++ b/crates/index-scheduler/src/scheduler/test_embedders.rs @@ -104,9 +104,10 @@ fn import_vectors() { let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); - let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); - let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); - let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); + let beagle_embed = + hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap(); + let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap(); + let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap(); (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) }; diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 2091047fc..565dbccf1 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -916,7 +916,7 @@ fn prepare_search<'t>( let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); embedder - .embed_one(query.q.clone().unwrap(), Some(deadline)) + .embed_search(query.q.clone().unwrap(), Some(deadline)) .map_err(milli::vector::Error::from) .map_err(milli::Error::from)? } diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index 368d61833..a1c8b71da 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -203,7 +203,7 @@ impl<'a> Search<'a> { let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); - match embedder.embed_one(query, Some(deadline)) { + match embedder.embed_search(query, Some(deadline)) { Ok(embedding) => embedding, Err(error) => { tracing::error!(error=%error, "Embedding failed"); diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 9103e8324..642cd610a 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -795,7 +795,7 @@ fn embed_chunks( unused_vectors_distribution: &UnusedVectorsDistribution, request_threads: &ThreadPoolNoAbort, ) -> Result>> { - match embedder.embed_chunks(text_chunks, request_threads) { + match embedder.embed_index(text_chunks, request_threads) { Ok(chunks) => Ok(chunks), Err(error) => { if let FaultSource::Bug = error.fault { diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index b268647c2..6820ee67b 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> { return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))); } - let res = match embedder.embed_chunks_ref(texts.as_slice(), threads) { + let res = match embedder.embed_index_ref(texts.as_slice(), threads) { Ok(embeddings) => { for (docid, embedding) in ids.into_iter().zip(embeddings) { sender.set_vector(*docid, embedder_id, embedding).unwrap(); diff --git a/crates/milli/src/vector/hf.rs b/crates/milli/src/vector/hf.rs index b01a66255..3ec0a5b7c 100644 --- a/crates/milli/src/vector/hf.rs +++ b/crates/milli/src/vector/hf.rs @@ -346,7 +346,7 @@ impl Embedder { Ok(embedding) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, ) -> std::result::Result>, EmbedError> { @@ -378,7 +378,7 @@ impl Embedder { }) } - pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result, EmbedError> { + pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result, EmbedError> { texts.iter().map(|text| self.embed_one(text)).collect() } } diff --git a/crates/milli/src/vector/manual.rs b/crates/milli/src/vector/manual.rs index 8c2ef97b2..b95bf0ea2 100644 --- a/crates/milli/src/vector/manual.rs +++ b/crates/milli/src/vector/manual.rs @@ -30,7 +30,7 @@ impl Embedder { self.dimensions } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, ) -> Result>, EmbedError> { @@ -41,7 +41,7 @@ impl Embedder { self.distribution } - pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result, EmbedError> { + pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result, EmbedError> { texts.iter().map(|text| self.embed_one(text)).collect() } } diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 47307295e..d5569a8e6 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -628,13 +628,16 @@ impl Embedder { EmbedderOptions::Rest(options) => { Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?) } + EmbedderOptions::Composite(options) => { + Self::Composite(composite::Embedder::new(options)?) + } }) } /// Embed one or multiple texts. /// /// Each text can be embedded as one or multiple embeddings. - pub fn embed( + fn embed( &self, texts: Vec, deadline: Option, @@ -649,7 +652,7 @@ impl Embedder { } #[tracing::instrument(level = "debug", skip_all, target = "search")] - pub fn embed_one( + pub fn embed_search( &self, text: String, deadline: Option, @@ -662,31 +665,32 @@ impl Embedder { /// Embed multiple chunks of texts. /// /// Each chunk is composed of one or multiple texts. - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, ) -> std::result::Result>, EmbedError> { match self { - Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), - Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks, threads), - Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks, threads), - Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), - Embedder::Rest(embedder) => embedder.embed_chunks(text_chunks, threads), + Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), + Embedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks), + Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), } } - pub fn embed_chunks_ref( + /// Non-owning variant of [`Self::embed_index`]. + pub fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, ) -> std::result::Result, EmbedError> { match self { - Embedder::HuggingFace(embedder) => embedder.embed_chunks_ref(texts), - Embedder::OpenAi(embedder) => embedder.embed_chunks_ref(texts, threads), - Embedder::Ollama(embedder) => embedder.embed_chunks_ref(texts, threads), - Embedder::UserProvided(embedder) => embedder.embed_chunks_ref(texts), - Embedder::Rest(embedder) => embedder.embed_chunks_ref(texts, threads), + Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), + Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts), + Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), } } diff --git a/crates/milli/src/vector/ollama.rs b/crates/milli/src/vector/ollama.rs index d2a80d6b5..130e90cee 100644 --- a/crates/milli/src/vector/ollama.rs +++ b/crates/milli/src/vector/ollama.rs @@ -113,7 +113,7 @@ impl Embedder { } } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -134,7 +134,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, diff --git a/crates/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs index c7aec5d93..8a5e6266a 100644 --- a/crates/milli/src/vector/openai.rs +++ b/crates/milli/src/vector/openai.rs @@ -250,7 +250,7 @@ impl Embedder { Ok(all_embeddings) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -271,7 +271,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, diff --git a/crates/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs index 467169d9c..a31bc5d2f 100644 --- a/crates/milli/src/vector/rest.rs +++ b/crates/milli/src/vector/rest.rs @@ -184,7 +184,7 @@ impl Embedder { Ok(embeddings.pop().unwrap()) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -205,7 +205,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, From 294cf39cad33a127537c6fdd82331e8eba3b19ba Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 20 Feb 2025 11:37:27 +0100 Subject: [PATCH 05/16] Integrate composite embedder --- crates/milli/src/vector/mod.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index d5569a8e6..a253963d2 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -538,6 +538,8 @@ pub enum Embedder { Ollama(ollama::Embedder), /// An embedder based on making embedding queries against a generic JSON/REST embedding server. Rest(rest::Embedder), + /// An embedder composed of an embedder at search time and an embedder at indexing time. + Composite(composite::Embedder), } /// Configuration for an embedder. @@ -607,6 +609,7 @@ pub enum EmbedderOptions { Ollama(ollama::EmbedderOptions), UserProvided(manual::EmbedderOptions), Rest(rest::EmbedderOptions), + Composite(composite::EmbedderOptions), } impl Default for EmbedderOptions { @@ -648,6 +651,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.embed(&texts, deadline), Embedder::UserProvided(embedder) => embedder.embed(&texts), Embedder::Rest(embedder) => embedder.embed(texts, deadline), + Embedder::Composite(embedder) => embedder.search.embed(texts, deadline), } } @@ -676,6 +680,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks), Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::Composite(embedder) => embedder.index.embed_index(text_chunks, threads), } } @@ -691,6 +696,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts), Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::Composite(embedder) => embedder.index.embed_index_ref(texts, threads), } } @@ -702,6 +708,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.chunk_count_hint(), Embedder::UserProvided(_) => 100, Embedder::Rest(embedder) => embedder.chunk_count_hint(), + Embedder::Composite(embedder) => embedder.index.chunk_count_hint(), } } @@ -713,6 +720,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::UserProvided(_) => 1, Embedder::Rest(embedder) => embedder.prompt_count_in_chunk_hint(), + Embedder::Composite(embedder) => embedder.index.prompt_count_in_chunk_hint(), } } @@ -724,6 +732,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.dimensions(), Embedder::UserProvided(embedder) => embedder.dimensions(), Embedder::Rest(embedder) => embedder.dimensions(), + Embedder::Composite(embedder) => embedder.dimensions(), } } @@ -735,6 +744,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.distribution(), Embedder::UserProvided(embedder) => embedder.distribution(), Embedder::Rest(embedder) => embedder.distribution(), + Embedder::Composite(embedder) => embedder.distribution(), } } @@ -745,6 +755,7 @@ impl Embedder { | Embedder::Ollama(_) | Embedder::Rest(_) => true, Embedder::UserProvided(_) => false, + Embedder::Composite(embedder) => embedder.index.uses_document_template(), } } } From 3cdcc54a9ea8c70b75fc7309bf90dfbe67aa6ea4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 13:55:13 +0100 Subject: [PATCH 06/16] analytics --- crates/meilisearch/src/routes/indexes/settings_analytics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index ffeadcab6..4944349a4 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -512,6 +512,7 @@ impl EmbeddersAnalytics { EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), EmbedderSource::Ollama => sources.insert("ollama".to_string()), EmbedderSource::Rest => sources.insert("rest".to_string()), + EmbedderSource::Composite => sources.insert("composite".to_string()), }; } }; From b85180fedb410a90ce0af2fd88d44de35d81c249 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 13:55:40 +0100 Subject: [PATCH 07/16] Error types --- crates/meilisearch-types/src/error.rs | 7 +-- crates/milli/src/error.rs | 64 ++++++++++++++++++++------- 2 files changed, 53 insertions(+), 18 deletions(-) diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index f64301b8c..5a0451b6c 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -428,9 +428,10 @@ impl ErrorCode for milli::Error { | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | UserError::InvalidPrompt(_) - | UserError::InvalidDisableBinaryQuantization { .. } => { - Code::InvalidSettingsEmbedders - } + | UserError::InvalidDisableBinaryQuantization { .. } + | UserError::InvalidSourceForNested { .. } + | UserError::MissingSourceForNested { .. } + | UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index c8ed1912f..c977362d6 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -13,6 +13,7 @@ use thiserror::Error; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::{self, DocumentsBatchCursorError}; use crate::thread_pool_no_abort::PanicCatched; +use crate::vector::settings::EmbeddingSettings; use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { @@ -229,28 +230,52 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidSimilarEmbedder(String), #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")] TooManyVectors(String, usize), - #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}", - allowed_sources_for_field - .iter() - .map(|accepted| format!("`{}`", accepted)) - .collect::>() - .join(", "), - allowed_fields_for_source - .iter() - .map(|accepted| format!("`{}`", accepted)) - .collect::>() - .join(", ") + #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}`{for_context}.{available_sources}{available_fields}{available_contexts}", + field=field.name(), + for_context={ + context.in_context() + }, + available_sources={ + let allowed_sources_for_field = EmbeddingSettings::allowed_sources_for_field(*field, *context); + if allowed_sources_for_field.is_empty() { + String::new() + } else { + format!("\n - note: `{}` is available for sources: {}", + field.name(), + allowed_sources_for_field + .iter() + .map(|accepted| format!("`{}`", accepted)) + .collect::>() + .join(", "), + ) + } + }, + available_fields={ + let allowed_fields_for_source = EmbeddingSettings::allowed_fields_for_source(*source_, *context); + format!("\n - note: available fields for source `{source_}`{}: {}",context.in_context(), allowed_fields_for_source + .iter() + .map(|accepted| format!("`{}`", accepted)) + .collect::>() + .join(", "),) + }, + available_contexts={ + let available_not_nested = !matches!(EmbeddingSettings::field_status(*source_, *field, crate::vector::settings::NestingContext::NotNested), crate::vector::settings::FieldStatus::Disallowed); + if available_not_nested { + format!("\n - note: `{}` is available when source `{source_}` is not{}", field.name(), context.in_context()) + } else { + String::new() + } + } )] InvalidFieldForSource { embedder_name: String, source_: crate::vector::settings::EmbedderSource, - field: &'static str, - allowed_fields_for_source: &'static [&'static str], - allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource], + context: crate::vector::settings::NestingContext, + field: crate::vector::settings::MetaEmbeddingSetting, }, #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())] InvalidOpenAiModel { embedder_name: String, model: String }, - #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")] + #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source `{source_}`)")] MissingFieldForSource { field: &'static str, source_: crate::vector::settings::EmbedderSource, @@ -270,6 +295,15 @@ and can not be more than 511 bytes.", .document_id.to_string() dimensions: usize, max_dimensions: usize, }, + #[error("`.embedders.{embedder_name}.source`: Source `{source_}` is not available in a nested embedder")] + InvalidSourceForNested { + embedder_name: String, + source_: crate::vector::settings::EmbedderSource, + }, + #[error("`.embedders.{embedder_name}`: Missing field `source`.\n - note: this field is mandatory for nested embedders")] + MissingSourceForNested { embedder_name: String }, + #[error("`.embedders.{embedder_name}`: {message}")] + InvalidSettingsEmbedder { embedder_name: String, message: String }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, #[error( From 9f3e4801b1da8fba4ae5f9b99382b613e5c00f2a Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 13:56:58 +0100 Subject: [PATCH 08/16] Refactor settings validation and introduce SubEmbedderSettings --- crates/milli/src/update/settings.rs | 238 ++-- crates/milli/src/vector/settings.rs | 1813 +++++++++++++++++++++------ 2 files changed, 1560 insertions(+), 491 deletions(-) diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 11682177f..315988e98 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -27,8 +27,8 @@ use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{ - check_set, check_unset, EmbedderAction, EmbedderSource, EmbeddingSettings, ReindexAction, - WriteBackToDocuments, + EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction, + SubEmbeddingSettings, WriteBackToDocuments, }; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result}; @@ -1669,26 +1669,12 @@ fn embedders(embedding_configs: Vec) -> Result, -) -> Result> { - match new { - Setting::Set(EmbeddingSettings { - source, - model, - revision, - pooling, - api_key, - dimensions, - document_template: Setting::Set(template), - document_template_max_bytes, - url, - request, - response, - distribution, - headers, - binary_quantized: binary_quantize, - }) => { - let max_bytes = match document_template_max_bytes.set() { + new_prompt: Setting, + max_bytes: Setting, +) -> Result> { + match new_prompt { + Setting::Set(template) => { + let max_bytes = match max_bytes.set() { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes { embedder_name: name.to_owned(), @@ -1706,22 +1692,7 @@ fn validate_prompt( .map(|prompt| crate::prompt::PromptData::from(prompt).template) .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; - Ok(Setting::Set(EmbeddingSettings { - source, - model, - revision, - pooling, - api_key, - dimensions, - document_template: Setting::Set(template), - document_template_max_bytes, - url, - request, - response, - distribution, - headers, - binary_quantized: binary_quantize, - })) + Ok(Setting::Set(template)) } new => Ok(new), } @@ -1731,7 +1702,6 @@ pub fn validate_embedding_settings( settings: Setting, name: &str, ) -> Result> { - let settings = validate_prompt(name, settings)?; let Setting::Set(settings) = settings else { return Ok(settings) }; let EmbeddingSettings { source, @@ -1745,11 +1715,15 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + mut indexing_embedder, distribution, headers, binary_quantized: binary_quantize, } = settings; + let document_template = validate_prompt(name, document_template, document_template_max_bytes)?; + if let Some(0) = dimensions.set() { return Err(crate::error::UserError::InvalidSettingsDimensions { embedder_name: name.to_owned(), @@ -1775,6 +1749,7 @@ pub fn validate_embedding_settings( } let Some(inferred_source) = source.set() else { + // we are validating the fused settings, so we always have a source return Ok(Setting::Set(EmbeddingSettings { source, model, @@ -1787,20 +1762,35 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + indexing_embedder, distribution, headers, binary_quantized: binary_quantize, })); }; + EmbeddingSettings::check_settings( + name, + inferred_source, + NestingContext::NotNested, + &model, + &revision, + &pooling, + &dimensions, + &api_key, + &url, + &request, + &response, + &document_template, + &document_template_max_bytes, + &headers, + &search_embedder, + &indexing_embedder, + &binary_quantize, + &distribution, + )?; match inferred_source { EmbedderSource::OpenAi => { - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - if let Setting::Set(model) = &model { let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) .ok_or(crate::error::UserError::InvalidOpenAiModel { @@ -1831,55 +1821,117 @@ pub fn validate_embedding_settings( } } } - EmbedderSource::Ollama => { - check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; + EmbedderSource::Ollama + | EmbedderSource::HuggingFace + | EmbedderSource::UserProvided + | EmbedderSource::Rest => {} + EmbedderSource::Composite => { + if let Setting::Set(embedder) = &search_embedder { + if let Some(source) = embedder.source.set() { + let search_embedder = match embedder.search_embedder.clone() { + Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder( + search_embedder, + name, + NestingContext::Search, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + let indexing_embedder = match embedder.indexing_embedder.clone() { + Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder( + indexing_embedder, + name, + NestingContext::Search, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + EmbeddingSettings::check_nested_source(name, source, NestingContext::Search)?; + EmbeddingSettings::check_settings( + name, + source, + NestingContext::Search, + &embedder.model, + &embedder.revision, + &embedder.pooling, + &embedder.dimensions, + &embedder.api_key, + &embedder.url, + &embedder.request, + &embedder.response, + &embedder.document_template, + &embedder.document_template_max_bytes, + &embedder.headers, + &search_embedder, + &indexing_embedder, + &embedder.binary_quantized, + &embedder.distribution, + )?; + } else { + return Err(UserError::MissingSourceForNested { + embedder_name: NestingContext::Search.embedder_name_with_context(name), + } + .into()); + } + } - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::HuggingFace => { - check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; - check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; + indexing_embedder = if let Setting::Set(mut embedder) = indexing_embedder { + embedder.document_template = validate_prompt( + name, + embedder.document_template, + embedder.document_template_max_bytes, + )?; - check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::UserProvided => { - check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; - check_unset( - &document_template, - EmbeddingSettings::DOCUMENT_TEMPLATE, - inferred_source, - name, - )?; - check_unset( - &document_template_max_bytes, - EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES, - inferred_source, - name, - )?; - check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; - - check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::Rest => { - check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + if let Some(source) = embedder.source.set() { + let search_embedder = match embedder.search_embedder.clone() { + Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder( + search_embedder, + name, + NestingContext::Indexing, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + let indexing_embedder = match embedder.indexing_embedder.clone() { + Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder( + indexing_embedder, + name, + NestingContext::Indexing, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + EmbeddingSettings::check_nested_source(name, source, NestingContext::Indexing)?; + EmbeddingSettings::check_settings( + name, + source, + NestingContext::Indexing, + &embedder.model, + &embedder.revision, + &embedder.pooling, + &embedder.dimensions, + &embedder.api_key, + &embedder.url, + &embedder.request, + &embedder.response, + &embedder.document_template, + &embedder.document_template_max_bytes, + &embedder.headers, + &search_embedder, + &indexing_embedder, + &embedder.binary_quantized, + &embedder.distribution, + )?; + } else { + return Err(UserError::MissingSourceForNested { + embedder_name: NestingContext::Indexing.embedder_name_with_context(name), + } + .into()); + } + Setting::Set(embedder) + } else { + indexing_embedder + }; } } Ok(Setting::Set(EmbeddingSettings { diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index 4e9997028..610597dd5 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -6,8 +6,9 @@ use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; +use super::composite::SubEmbedderOptions; use super::hf::OverridePooling; -use super::{ollama, openai, DistributionShift}; +use super::{ollama, openai, DistributionShift, EmbedderOptions}; use crate::prompt::{default_max_bytes, PromptData}; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -265,6 +266,17 @@ pub struct EmbeddingSettings { /// /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings pub headers: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub search_embedder: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub indexing_embedder: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] @@ -280,23 +292,254 @@ pub struct EmbeddingSettings { pub distribution: Setting, } -pub fn check_unset( - key: &Setting, - field: &'static str, - source: EmbedderSource, - embedder_name: &str, -) -> Result<(), UserError> { - if matches!(key, Setting::NotSet) { - Ok(()) - } else { - Err(UserError::InvalidFieldForSource { - embedder_name: embedder_name.to_owned(), - source_: source, - field, - allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source), - allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field), - }) - } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub struct SubEmbeddingSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The source used to provide the embeddings. + /// + /// Which embedder parameters are available and mandatory is determined by the value of this setting. + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - Defaults to `openAi` + pub source: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The name of the model to use. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `ollama` + /// + /// # Availability + /// + /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - For source `openAi`, defaults to `text-embedding-3-small` + /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` + pub model: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The revision (commit SHA1) of the model to use. + /// + /// If unspecified, Meilisearch picks the latest revision of the model. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` + /// - Otherwise, defaults to `null` + pub revision: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The pooling method to use. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - Defaults to `useModel` + /// + /// # Compatibility Note + /// + /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. + pub pooling: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The API key to pass to the remote embedder while making requests. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`. + /// - For other sources, no bearer token is sent if this parameter is not set. + /// + /// # Note + /// + /// - This setting is partially hidden when returned by the settings + pub api_key: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The expected dimensions of the embeddings produced by this embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `userProvided` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When the source is `openAi`, changing the value of this parameter always regenerates embeddings + /// - ๐ŸŒฑ For other sources, changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the dimensions is the maximum allowed by the model. + /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. + pub dimensions: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. + pub document_template: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Rendered texts are truncated to this size. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text. + /// - ๐ŸŒฑ When decreased, embeddings are never regenerated + /// + /// # Default + /// + /// - Defaults to 400 + pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// URL to reach the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama` and `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ When modified for source `openAi`, embeddings are never regenerated + /// - ๐Ÿ—๏ธ When modified for sources `ollama` and `rest`, embeddings are always regenerated + pub url: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Template request to send to the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + pub request: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Template response indicating how to find the embeddings in the response from the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐Ÿ—๏ธ Changing the value of this parameter always regenerates embeddings + pub response: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + /// Additional headers to send to the remote embedder. + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # ๐Ÿ”„ Reindexing + /// + /// - ๐ŸŒฑ Changing the value of this parameter never regenerates embeddings + pub headers: Setting>, + + // The following fields are provided for the sake of improving error handling + // They should always be set to `NotSet`, otherwise an error will be returned + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub distribution: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub binary_quantized: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub search_embedder: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub indexing_embedder: Setting, } /// Indicates what action should take place during a reindexing operation for an embedder @@ -381,6 +624,8 @@ impl SettingsDiff { mut url, mut request, mut response, + mut search_embedder, + mut indexing_embedder, mut distribution, mut headers, mut document_template_max_bytes, @@ -398,6 +643,8 @@ impl SettingsDiff { url: new_url, request: new_request, response: new_response, + search_embedder: new_search_embedder, + indexing_embedder: new_indexing_embedder, distribution: new_distribution, headers: new_headers, document_template_max_bytes: new_document_template_max_bytes, @@ -414,93 +661,45 @@ impl SettingsDiff { let mut reindex_action = None; - // **Warning**: do not use short-circuiting || here, we want all these operations applied - if source.apply(new_source) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - // when the source changes, we need to reapply the default settings for the new source - apply_default_for_source( - &source, - &mut model, - &mut revision, - &mut pooling, - &mut dimensions, - &mut url, - &mut request, - &mut response, - &mut document_template, - &mut document_template_max_bytes, - &mut headers, - ) - } - if model.apply(new_model) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if revision.apply(new_revision) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if pooling.apply(new_pooling) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if dimensions.apply(new_dimensions) { - match source { - // regenerate on dimensions change in OpenAI since truncation is supported - Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::FullReindex, - ); - } - // for all other embedders, the parameter is a hint that should not be able to change the result - // and so won't cause a reindex by itself. - _ => {} - } - } + Self::diff( + &mut reindex_action, + &mut source, + &mut model, + &mut revision, + &mut pooling, + &mut api_key, + &mut dimensions, + &mut document_template, + &mut document_template_max_bytes, + &mut url, + &mut request, + &mut response, + &mut headers, + new_source, + new_model, + new_revision, + new_pooling, + new_api_key, + new_dimensions, + new_document_template, + new_document_template_max_bytes, + new_url, + new_request, + new_response, + new_headers, + ); + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); - if url.apply(new_url) { - match source { - // do not regenerate on an url change in OpenAI - Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {} - _ => { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::FullReindex, - ); - } - } - } - if request.apply(new_request) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if response.apply(new_response) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if document_template.apply(new_document_template) { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::RegeneratePrompts, - ); - } - if document_template_max_bytes.apply(new_document_template_max_bytes) { - let previous_document_template_max_bytes = - document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); - let new_document_template_max_bytes = - new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); - - // only reindex if the size increased. Reasoning: - // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors - // - size increase is an accuracy optimization, so we want to reindex - if new_document_template_max_bytes > previous_document_template_max_bytes { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::RegeneratePrompts, - ) - } - } + // changes to the *search* embedder never triggers any reindexing + search_embedder.apply(new_search_embedder); + indexing_embedder = Self::from_sub_settings( + indexing_embedder, + new_indexing_embedder, + &mut reindex_action, + )?; distribution.apply(new_distribution); - api_key.apply(new_api_key); - headers.apply(new_headers); let updated_settings = EmbeddingSettings { source, @@ -513,6 +712,8 @@ impl SettingsDiff { url, request, response, + search_embedder, + indexing_embedder, distribution, headers, document_template_max_bytes, @@ -538,6 +739,223 @@ impl SettingsDiff { }; Ok(ret) } + + fn from_sub_settings( + sub_embedder: Setting, + new_sub_embedder: Setting, + reindex_action: &mut Option, + ) -> Result, UserError> { + let ret = match new_sub_embedder { + Setting::Set(new_sub_embedder) => { + let Setting::Set(SubEmbeddingSettings { + mut source, + mut model, + mut revision, + mut pooling, + mut api_key, + mut dimensions, + mut document_template, + mut document_template_max_bytes, + mut url, + mut request, + mut response, + mut headers, + // phony settings + mut distribution, + mut binary_quantized, + mut search_embedder, + mut indexing_embedder, + }) = sub_embedder + else { + // return the new_indexing_embedder if the indexing_embedder was not set + // this should happen only when changing the source, so the decision to reindex is already taken. + return Ok(Setting::Set(new_sub_embedder)); + }; + + let SubEmbeddingSettings { + source: new_source, + model: new_model, + revision: new_revision, + pooling: new_pooling, + api_key: new_api_key, + dimensions: new_dimensions, + document_template: new_document_template, + document_template_max_bytes: new_document_template_max_bytes, + url: new_url, + request: new_request, + response: new_response, + headers: new_headers, + distribution: new_distribution, + binary_quantized: new_binary_quantized, + search_embedder: new_search_embedder, + indexing_embedder: new_indexing_embedder, + } = new_sub_embedder; + + Self::diff( + reindex_action, + &mut source, + &mut model, + &mut revision, + &mut pooling, + &mut api_key, + &mut dimensions, + &mut document_template, + &mut document_template_max_bytes, + &mut url, + &mut request, + &mut response, + &mut headers, + new_source, + new_model, + new_revision, + new_pooling, + new_api_key, + new_dimensions, + new_document_template, + new_document_template_max_bytes, + new_url, + new_request, + new_response, + new_headers, + ); + + // update phony settings, it is always an error to have them set. + distribution.apply(new_distribution); + binary_quantized.apply(new_binary_quantized); + search_embedder.apply(new_search_embedder); + indexing_embedder.apply(new_indexing_embedder); + + let updated_settings = SubEmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + url, + request, + response, + headers, + document_template_max_bytes, + distribution, + binary_quantized, + search_embedder, + indexing_embedder, + }; + Setting::Set(updated_settings) + } + // handled during validation of the settings + Setting::Reset | Setting::NotSet => sub_embedder, + }; + Ok(ret) + } + + #[allow(clippy::too_many_arguments)] + fn diff( + reindex_action: &mut Option, + source: &mut Setting, + model: &mut Setting, + revision: &mut Setting, + pooling: &mut Setting, + api_key: &mut Setting, + dimensions: &mut Setting, + document_template: &mut Setting, + document_template_max_bytes: &mut Setting, + url: &mut Setting, + request: &mut Setting, + response: &mut Setting, + headers: &mut Setting>, + new_source: Setting, + new_model: Setting, + new_revision: Setting, + new_pooling: Setting, + new_api_key: Setting, + new_dimensions: Setting, + new_document_template: Setting, + new_document_template_max_bytes: Setting, + new_url: Setting, + new_request: Setting, + new_response: Setting, + new_headers: Setting>, + ) { + // **Warning**: do not use short-circuiting || here, we want all these operations applied + if source.apply(new_source) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + // when the source changes, we need to reapply the default settings for the new source + apply_default_for_source( + &*source, + model, + revision, + pooling, + dimensions, + url, + request, + response, + document_template, + document_template_max_bytes, + headers, + // send dummy values, the source cannot recursively be composite + &mut Setting::NotSet, + &mut Setting::NotSet, + ) + } + if model.apply(new_model) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if revision.apply(new_revision) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if pooling.apply(new_pooling) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if dimensions.apply(new_dimensions) { + match *source { + // regenerate on dimensions change in OpenAI since truncation is supported + Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + // for all other embedders, the parameter is a hint that should not be able to change the result + // and so won't cause a reindex by itself. + _ => {} + } + } + if url.apply(new_url) { + match *source { + // do not regenerate on an url change in OpenAI + Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {} + _ => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + } + } + if request.apply(new_request) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if response.apply(new_response) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if document_template.apply(new_document_template) { + ReindexAction::push_action(reindex_action, ReindexAction::RegeneratePrompts); + } + + if document_template_max_bytes.apply(new_document_template_max_bytes) { + let previous_document_template_max_bytes = + document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + let new_document_template_max_bytes = + new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + + // only reindex if the size increased. Reasoning: + // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors + // - size increase is an accuracy optimization, so we want to reindex + if new_document_template_max_bytes > previous_document_template_max_bytes { + ReindexAction::push_action(reindex_action, ReindexAction::RegeneratePrompts) + } + } + + api_key.apply(new_api_key); + headers.apply(new_headers); + } } impl ReindexAction { @@ -563,6 +981,8 @@ fn apply_default_for_source( document_template: &mut Setting, document_template_max_bytes: &mut Setting, headers: &mut Setting>, + search_embedder: &mut Setting, + indexing_embedder: &mut Setting, ) { match source { Setting::Set(EmbedderSource::HuggingFace) => { @@ -574,6 +994,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::Ollama) => { *model = Setting::Reset; @@ -584,6 +1006,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { *model = Setting::Reset; @@ -594,6 +1018,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::Rest) => { *model = Setting::NotSet; @@ -604,6 +1030,8 @@ fn apply_default_for_source( *request = Setting::Reset; *response = Setting::Reset; *headers = Setting::Reset; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::UserProvided) => { *model = Setting::NotSet; @@ -616,148 +1044,374 @@ fn apply_default_for_source( *document_template = Setting::NotSet; *document_template_max_bytes = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; + } + Setting::Set(EmbedderSource::Composite) => { + *model = Setting::NotSet; + *revision = Setting::NotSet; + *pooling = Setting::NotSet; + *dimensions = Setting::NotSet; + *url = Setting::NotSet; + *request = Setting::NotSet; + *response = Setting::NotSet; + *document_template = Setting::NotSet; + *document_template_max_bytes = Setting::NotSet; + *headers = Setting::NotSet; + *search_embedder = Setting::Reset; + *indexing_embedder = Setting::Reset; } Setting::NotSet => {} } } -pub fn check_set( - key: &Setting, - field: &'static str, - source: EmbedderSource, - embedder_name: &str, -) -> Result<(), UserError> { - if matches!(key, Setting::Set(_)) { - Ok(()) - } else { - Err(UserError::MissingFieldForSource { - field, - source_: source, - embedder_name: embedder_name.to_owned(), - }) +pub(crate) enum FieldStatus { + Mandatory, + Allowed, + Disallowed, +} + +#[derive(Debug, Clone, Copy)] +pub enum NestingContext { + NotNested, + Search, + Indexing, +} + +impl NestingContext { + pub fn embedder_name_with_context(&self, embedder_name: &str) -> String { + match self { + NestingContext::NotNested => embedder_name.to_string(), + NestingContext::Search => format!("{embedder_name}.searchEmbedder"), + NestingContext::Indexing => format!("{embedder_name}.indexingEmbedder",), + } + } + + pub fn in_context(&self) -> &'static str { + match self { + NestingContext::NotNested => "", + NestingContext::Search => " for the search embedder", + NestingContext::Indexing => " for the indexing embedder", + } + } + + pub fn nesting_embedders(&self) -> &'static str { + match self { + NestingContext::NotNested => "", + NestingContext::Search => { + "\n - note: nesting embedders in `searchEmbedder` is not allowed" + } + NestingContext::Indexing => { + "\n - note: nesting embedders in `indexingEmbedder` is not allowed" + } + } + } +} + +#[derive(Debug, Clone, Copy, enum_iterator::Sequence)] +pub enum MetaEmbeddingSetting { + Source, + Model, + Revision, + Pooling, + ApiKey, + Dimensions, + DocumentTemplate, + DocumentTemplateMaxBytes, + Url, + Request, + Response, + Headers, + SearchEmbedder, + IndexingEmbedder, + Distribution, + BinaryQuantized, +} + +impl MetaEmbeddingSetting { + pub(crate) fn name(&self) -> &'static str { + use MetaEmbeddingSetting::*; + match self { + Source => "source", + Model => "model", + Revision => "revision", + Pooling => "pooling", + ApiKey => "apiKey", + Dimensions => "dimensions", + DocumentTemplate => "documentTemplate", + DocumentTemplateMaxBytes => "documentTemplateMaxBytes", + Url => "url", + Request => "request", + Response => "response", + Headers => "headers", + SearchEmbedder => "searchEmbedder", + IndexingEmbedder => "indexingEmbedder", + Distribution => "distribution", + BinaryQuantized => "binaryQuantized", + } } } impl EmbeddingSettings { - pub const SOURCE: &'static str = "source"; - pub const MODEL: &'static str = "model"; - pub const REVISION: &'static str = "revision"; - pub const POOLING: &'static str = "pooling"; - pub const API_KEY: &'static str = "apiKey"; - pub const DIMENSIONS: &'static str = "dimensions"; - pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; - pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes"; + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_settings( + embedder_name: &str, + source: EmbedderSource, + context: NestingContext, + model: &Setting, + revision: &Setting, + pooling: &Setting, + dimensions: &Setting, + api_key: &Setting, + url: &Setting, + request: &Setting, + response: &Setting, + document_template: &Setting, + document_template_max_bytes: &Setting, + headers: &Setting>, + search_embedder: &Setting, + indexing_embedder: &Setting, + binary_quantized: &Setting, + distribution: &Setting, + ) -> Result<(), UserError> { + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::Model, context, model)?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Revision, + context, + revision, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Pooling, + context, + pooling, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Dimensions, + context, + dimensions, + )?; + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::ApiKey, context, api_key)?; + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::Url, context, url)?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Request, + context, + request, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Response, + context, + response, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::DocumentTemplate, + context, + document_template, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::DocumentTemplateMaxBytes, + context, + document_template_max_bytes, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Headers, + context, + headers, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::SearchEmbedder, + context, + search_embedder, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::IndexingEmbedder, + context, + indexing_embedder, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::BinaryQuantized, + context, + binary_quantized, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Distribution, + context, + distribution, + ) + } - pub const URL: &'static str = "url"; - pub const REQUEST: &'static str = "request"; - pub const RESPONSE: &'static str = "response"; - pub const HEADERS: &'static str = "headers"; + pub(crate) fn allowed_sources_for_field( + field: MetaEmbeddingSetting, + context: NestingContext, + ) -> Vec { + enum_iterator::all() + .filter(|source| { + !matches!(Self::field_status(*source, field, context), FieldStatus::Disallowed) + }) + .collect() + } - pub const DISTRIBUTION: &'static str = "distribution"; + pub(crate) fn allowed_fields_for_source( + source: EmbedderSource, + context: NestingContext, + ) -> Vec<&'static str> { + enum_iterator::all() + .filter(|field| { + !matches!(Self::field_status(source, *field, context), FieldStatus::Disallowed) + }) + .map(|field| field.name()) + .collect() + } - pub const BINARY_QUANTIZED: &'static str = "binaryQuantized"; - - pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] { - match field { - Self::SOURCE => &[ - EmbedderSource::HuggingFace, - EmbedderSource::OpenAi, - EmbedderSource::UserProvided, - EmbedderSource::Rest, - EmbedderSource::Ollama, - ], - Self::MODEL => { - &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] - } - Self::REVISION => &[EmbedderSource::HuggingFace], - Self::POOLING => &[EmbedderSource::HuggingFace], - Self::API_KEY => { - &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] - } - Self::DIMENSIONS => &[ - EmbedderSource::OpenAi, - EmbedderSource::UserProvided, - EmbedderSource::Ollama, - EmbedderSource::Rest, - ], - Self::DOCUMENT_TEMPLATE | Self::DOCUMENT_TEMPLATE_MAX_BYTES => &[ - EmbedderSource::HuggingFace, - EmbedderSource::OpenAi, - EmbedderSource::Ollama, - EmbedderSource::Rest, - ], - Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi], - Self::REQUEST => &[EmbedderSource::Rest], - Self::RESPONSE => &[EmbedderSource::Rest], - Self::HEADERS => &[EmbedderSource::Rest], - Self::DISTRIBUTION => &[ - EmbedderSource::HuggingFace, - EmbedderSource::Ollama, - EmbedderSource::OpenAi, - EmbedderSource::Rest, - EmbedderSource::UserProvided, - ], - Self::BINARY_QUANTIZED => &[ - EmbedderSource::HuggingFace, - EmbedderSource::Ollama, - EmbedderSource::OpenAi, - EmbedderSource::Rest, - EmbedderSource::UserProvided, - ], - _other => unreachable!("unknown field"), + fn check_setting( + embedder_name: &str, + source: EmbedderSource, + field: MetaEmbeddingSetting, + context: NestingContext, + setting: &Setting, + ) -> Result<(), UserError> { + match (Self::field_status(source, field, context), setting) { + (FieldStatus::Mandatory, Setting::Set(_)) + | (FieldStatus::Allowed, _) + | (FieldStatus::Disallowed, Setting::NotSet) => Ok(()), + (FieldStatus::Disallowed, _) => Err(UserError::InvalidFieldForSource { + embedder_name: context.embedder_name_with_context(embedder_name), + source_: source, + context, + field, + }), + (FieldStatus::Mandatory, _) => Err(UserError::MissingFieldForSource { + field: field.name(), + source_: source, + embedder_name: embedder_name.to_owned(), + }), } } - pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { - match source { - EmbedderSource::OpenAi => &[ - Self::SOURCE, - Self::MODEL, - Self::API_KEY, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::DIMENSIONS, - Self::DISTRIBUTION, - Self::URL, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::HuggingFace => &[ - Self::SOURCE, - Self::MODEL, - Self::REVISION, - Self::POOLING, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::Ollama => &[ - Self::SOURCE, - Self::MODEL, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::URL, - Self::API_KEY, - Self::DIMENSIONS, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::UserProvided => { - &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION, Self::BINARY_QUANTIZED] + pub(crate) fn field_status( + source: EmbedderSource, + field: MetaEmbeddingSetting, + context: NestingContext, + ) -> FieldStatus { + use EmbedderSource::*; + use MetaEmbeddingSetting::*; + use NestingContext::*; + match (source, field, context) { + (_, Distribution | BinaryQuantized, NotNested) => FieldStatus::Allowed, + (_, Distribution | BinaryQuantized, _) => FieldStatus::Disallowed, + (_, DocumentTemplate | DocumentTemplateMaxBytes, Search) => FieldStatus::Disallowed, + ( + OpenAi, + Source + | Model + | ApiKey + | DocumentTemplate + | DocumentTemplateMaxBytes + | Dimensions + | Url, + _, + ) => FieldStatus::Allowed, + ( + OpenAi, + Revision | Pooling | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + ( + HuggingFace, + Source | Model | Revision | Pooling | DocumentTemplate | DocumentTemplateMaxBytes, + _, + ) => FieldStatus::Allowed, + ( + HuggingFace, + ApiKey | Dimensions | Url | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (Ollama, Model, _) => FieldStatus::Mandatory, + ( + Ollama, + Source | DocumentTemplate | DocumentTemplateMaxBytes | Url | ApiKey | Dimensions, + _, + ) => FieldStatus::Allowed, + ( + Ollama, + Revision | Pooling | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (UserProvided, Dimensions, _) => FieldStatus::Mandatory, + (UserProvided, Source, _) => FieldStatus::Allowed, + ( + UserProvided, + Model + | Revision + | Pooling + | ApiKey + | DocumentTemplate + | DocumentTemplateMaxBytes + | Url + | Request + | Response + | Headers + | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (Rest, Url | Request | Response, _) => FieldStatus::Mandatory, + ( + Rest, + Source + | ApiKey + | Dimensions + | DocumentTemplate + | DocumentTemplateMaxBytes + | Headers, + _, + ) => FieldStatus::Allowed, + (Rest, Model | Revision | Pooling | SearchEmbedder | IndexingEmbedder, _) => { + FieldStatus::Disallowed } - EmbedderSource::Rest => &[ - Self::SOURCE, - Self::API_KEY, - Self::DIMENSIONS, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::URL, - Self::REQUEST, - Self::RESPONSE, - Self::HEADERS, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], + (Composite, SearchEmbedder | IndexingEmbedder, _) => FieldStatus::Mandatory, + (Composite, Source, _) => FieldStatus::Allowed, + ( + Composite, + Model + | Revision + | Pooling + | ApiKey + | Dimensions + | DocumentTemplate + | DocumentTemplateMaxBytes + | Url + | Request + | Response + | Headers, + _, + ) => FieldStatus::Disallowed, } } @@ -781,9 +1435,45 @@ impl EmbeddingSettings { *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) } } + + pub(crate) fn check_nested_source( + embedder_name: &str, + source: EmbedderSource, + context: NestingContext, + ) -> Result<(), UserError> { + match (context, source) { + (NestingContext::NotNested, _) => Ok(()), + ( + NestingContext::Search | NestingContext::Indexing, + EmbedderSource::Composite | EmbedderSource::UserProvided, + ) => Err(UserError::InvalidSourceForNested { + embedder_name: context.embedder_name_with_context(embedder_name), + source_: source, + }), + ( + NestingContext::Search | NestingContext::Indexing, + EmbedderSource::OpenAi + | EmbedderSource::HuggingFace + | EmbedderSource::Ollama + | EmbedderSource::Rest, + ) => Ok(()), + } + } } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[derive( + Debug, + Clone, + Copy, + Default, + Serialize, + Deserialize, + PartialEq, + Eq, + Deserr, + ToSchema, + enum_iterator::Sequence, +)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum EmbedderSource { @@ -793,6 +1483,7 @@ pub enum EmbedderSource { Ollama, UserProvided, Rest, + Composite, } impl std::fmt::Display for EmbedderSource { @@ -803,125 +1494,311 @@ impl std::fmt::Display for EmbedderSource { EmbedderSource::UserProvided => "userProvided", EmbedderSource::Ollama => "ollama", EmbedderSource::Rest => "rest", + EmbedderSource::Composite => "composite", }; f.write_str(s) } } +impl EmbeddingSettings { + fn from_hugging_face( + super::hf::EmbedderOptions { + model, + revision, + distribution, + pooling, + }: super::hf::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::HuggingFace), + model: Setting::Set(model), + revision: Setting::some_or_not_set(revision), + pooling: Setting::Set(pooling), + api_key: Setting::NotSet, + dimensions: Setting::NotSet, + document_template, + document_template_max_bytes, + url: Setting::NotSet, + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_openai( + super::openai::EmbedderOptions { + url, + api_key, + embedding_model, + dimensions, + distribution, + }: super::openai::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::OpenAi), + model: Setting::Set(embedding_model.name().to_owned()), + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::some_or_not_set(url), + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_ollama( + super::ollama::EmbedderOptions { + embedding_model, + url, + api_key, + distribution, + dimensions, + }: super::ollama::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::Ollama), + model: Setting::Set(embedding_model), + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::some_or_not_set(url), + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_user_provided( + super::manual::EmbedderOptions { dimensions, distribution }: super::manual::EmbedderOptions, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::UserProvided), + model: Setting::NotSet, + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::NotSet, + dimensions: Setting::Set(dimensions), + document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, + url: Setting::NotSet, + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_rest( + super::rest::EmbedderOptions { + api_key, + dimensions, + url, + request, + response, + distribution, + headers, + }: super::rest::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::Rest), + model: Setting::NotSet, + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::Set(url), + request: Setting::Set(request), + response: Setting::Set(response), + distribution: Setting::some_or_not_set(distribution), + headers: Setting::Set(headers), + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + binary_quantized: Setting::some_or_not_set(quantized), + } + } +} + impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { let EmbeddingConfig { embedder_options, prompt, quantized } = value; let document_template_max_bytes = Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { - super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { - model, - revision, - distribution, - pooling, - }) => Self { - source: Setting::Set(EmbedderSource::HuggingFace), - model: Setting::Set(model), - revision: Setting::some_or_not_set(revision), - pooling: Setting::Set(pooling), - api_key: Setting::NotSet, - dimensions: Setting::NotSet, - document_template: Setting::Set(prompt.template), + super::EmbedderOptions::HuggingFace(options) => Self::from_hugging_face( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::NotSet, - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { - url, - api_key, - embedding_model, - dimensions, - distribution, - }) => Self { - source: Setting::Set(EmbedderSource::OpenAi), - model: Setting::Set(embedding_model.name().to_owned()), - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + quantized, + ), + super::EmbedderOptions::OpenAi(options) => Self::from_openai( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::some_or_not_set(url), - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { - embedding_model, - url, - api_key, - distribution, - dimensions, - }) => Self { - source: Setting::Set(EmbedderSource::Ollama), - model: Setting::Set(embedding_model), - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + quantized, + ), + super::EmbedderOptions::Ollama(options) => Self::from_ollama( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::some_or_not_set(url), - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { - dimensions, - distribution, + quantized, + ), + super::EmbedderOptions::UserProvided(options) => { + Self::from_user_provided(options, quantized) + } + super::EmbedderOptions::Rest(options) => Self::from_rest( + options, + Setting::Set(prompt.template), + document_template_max_bytes, + quantized, + ), + super::EmbedderOptions::Composite(super::composite::EmbedderOptions { + search, + index, }) => Self { - source: Setting::Set(EmbedderSource::UserProvided), + source: Setting::Set(EmbedderSource::Composite), model: Setting::NotSet, revision: Setting::NotSet, pooling: Setting::NotSet, api_key: Setting::NotSet, - dimensions: Setting::Set(dimensions), + dimensions: Setting::NotSet, + binary_quantized: Setting::some_or_not_set(quantized), document_template: Setting::NotSet, document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), + distribution: Setting::some_or_not_set(search.distribution()), + search_embedder: Setting::Set(SubEmbeddingSettings::from_options( + search, + Setting::NotSet, + Setting::NotSet, + )), + indexing_embedder: Setting::Set(SubEmbeddingSettings::from_options( + index, + Setting::Set(prompt.template), + document_template_max_bytes, + )), }, - super::EmbedderOptions::Rest(super::rest::EmbedderOptions { - api_key, - dimensions, - url, - request, - response, - distribution, - headers, - }) => Self { - source: Setting::Set(EmbedderSource::Rest), - model: Setting::NotSet, - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + } + } +} + +impl SubEmbeddingSettings { + fn from_options( + options: SubEmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + ) -> Self { + let settings = match options { + SubEmbedderOptions::HuggingFace(embedder_options) => { + EmbeddingSettings::from_hugging_face( + embedder_options, + document_template, + document_template_max_bytes, + None, + ) + } + SubEmbedderOptions::OpenAi(embedder_options) => EmbeddingSettings::from_openai( + embedder_options, + document_template, document_template_max_bytes, - url: Setting::Set(url), - request: Setting::Set(request), - response: Setting::Set(response), - distribution: Setting::some_or_not_set(distribution), - headers: Setting::Set(headers), - binary_quantized: Setting::some_or_not_set(quantized), - }, + None, + ), + SubEmbedderOptions::Ollama(embedder_options) => EmbeddingSettings::from_ollama( + embedder_options, + document_template, + document_template_max_bytes, + None, + ), + SubEmbedderOptions::UserProvided(embedder_options) => { + EmbeddingSettings::from_user_provided(embedder_options, None) + } + SubEmbedderOptions::Rest(embedder_options) => EmbeddingSettings::from_rest( + embedder_options, + document_template, + document_template_max_bytes, + None, + ), + }; + settings.into() + } +} + +impl From for SubEmbeddingSettings { + fn from(value: EmbeddingSettings) -> Self { + let EmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + document_template_max_bytes, + url, + request, + response, + headers, + binary_quantized: _, + search_embedder: _, + indexing_embedder: _, + distribution: _, + } = value; + Self { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + document_template_max_bytes, + url, + request, + response, + headers, + distribution: Setting::NotSet, + binary_quantized: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, } } } @@ -944,88 +1821,26 @@ impl From for EmbeddingConfig { distribution, headers, binary_quantized, + search_embedder, + mut indexing_embedder, } = value; this.quantized = binary_quantized.set(); - - if let Some(source) = source.set() { - match source { - EmbedderSource::OpenAi => { - let mut options = super::openai::EmbedderOptions::with_default_model(None); - if let Some(model) = model.set() { - if let Some(model) = super::openai::EmbeddingModel::from_name(&model) { - options.embedding_model = model; - } - } - if let Some(url) = url.set() { - options.url = Some(url); - } - if let Some(api_key) = api_key.set() { - options.api_key = Some(api_key); - } - if let Some(dimensions) = dimensions.set() { - options.dimensions = Some(dimensions); - } - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::OpenAi(options); - } - EmbedderSource::Ollama => { - let mut options: ollama::EmbedderOptions = - super::ollama::EmbedderOptions::with_default_model( - api_key.set(), - url.set(), - dimensions.set(), - ); - if let Some(model) = model.set() { - options.embedding_model = model; - } - - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::Ollama(options); - } - EmbedderSource::HuggingFace => { - let mut options = super::hf::EmbedderOptions::default(); - if let Some(model) = model.set() { - options.model = model; - // Reset the revision if we are setting the model. - // This allows the following: - // "huggingFace": {} -> default model with default revision - // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision - // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision - options.revision = None; - } - if let Some(revision) = revision.set() { - options.revision = Some(revision); - } - if let Some(pooling) = pooling.set() { - options.pooling = pooling; - } - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::HuggingFace(options); - } - EmbedderSource::UserProvided => { - this.embedder_options = - super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { - dimensions: dimensions.set().unwrap(), - distribution: distribution.set(), - }); - } - EmbedderSource::Rest => { - this.embedder_options = - super::EmbedderOptions::Rest(super::rest::EmbedderOptions { - api_key: api_key.set(), - dimensions: dimensions.set(), - url: url.set().unwrap(), - request: request.set().unwrap(), - response: response.set().unwrap(), - distribution: distribution.set(), - headers: headers.set().unwrap_or_default(), - }) - } + if let Some((template, document_template_max_bytes)) = + match (document_template, &mut indexing_embedder) { + (Setting::Set(template), _) => Some((template, document_template_max_bytes)), + // retrieve the prompt from the indexing embedder in case of a composite embedder + ( + _, + Setting::Set(SubEmbeddingSettings { + document_template: Setting::Set(document_template), + document_template_max_bytes, + .. + }), + ) => Some((std::mem::take(document_template), *document_template_max_bytes)), + _ => None, } - } - - if let Setting::Set(template) = document_template { + { let max_bytes = document_template_max_bytes .set() .and_then(NonZeroUsize::new) @@ -1034,6 +1849,208 @@ impl From for EmbeddingConfig { this.prompt = PromptData { template, max_bytes: Some(max_bytes) } } + if let Some(source) = source.set() { + this.embedder_options = match source { + EmbedderSource::OpenAi => { + SubEmbedderOptions::openai(model, url, api_key, dimensions, distribution).into() + } + EmbedderSource::Ollama => { + SubEmbedderOptions::ollama(model, url, api_key, dimensions, distribution).into() + } + EmbedderSource::HuggingFace => { + SubEmbedderOptions::hugging_face(model, revision, pooling, distribution).into() + } + EmbedderSource::UserProvided => { + SubEmbedderOptions::user_provided(dimensions.set().unwrap(), distribution) + .into() + } + EmbedderSource::Rest => SubEmbedderOptions::rest( + url.set().unwrap(), + api_key, + request.set().unwrap(), + response.set().unwrap(), + headers, + dimensions, + distribution, + ) + .into(), + EmbedderSource::Composite => { + super::EmbedderOptions::Composite(super::composite::EmbedderOptions { + // it is important to give the distribution to the search here, as this is from where we'll retrieve it + search: SubEmbedderOptions::from_settings( + search_embedder.set().unwrap(), + distribution, + ), + index: SubEmbedderOptions::from_settings( + indexing_embedder.set().unwrap(), + Setting::NotSet, + ), + }) + } + }; + } + this } } + +impl SubEmbedderOptions { + fn from_settings( + settings: SubEmbeddingSettings, + distribution: Setting, + ) -> Self { + let SubEmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + // retrieved by the EmbeddingConfig + document_template: _, + document_template_max_bytes: _, + url, + request, + response, + headers, + // phony parameters + distribution: _, + binary_quantized: _, + search_embedder: _, + indexing_embedder: _, + } = settings; + + match source.set().unwrap() { + EmbedderSource::OpenAi => Self::openai(model, url, api_key, dimensions, distribution), + EmbedderSource::HuggingFace => { + Self::hugging_face(model, revision, pooling, distribution) + } + EmbedderSource::Ollama => Self::ollama(model, url, api_key, dimensions, distribution), + EmbedderSource::UserProvided => { + Self::user_provided(dimensions.set().unwrap(), distribution) + } + EmbedderSource::Rest => Self::rest( + url.set().unwrap(), + api_key, + request.set().unwrap(), + response.set().unwrap(), + headers, + dimensions, + distribution, + ), + EmbedderSource::Composite => panic!("nested composite embedders"), + } + } + + fn openai( + model: Setting, + url: Setting, + api_key: Setting, + dimensions: Setting, + distribution: Setting, + ) -> Self { + let mut options = super::openai::EmbedderOptions::with_default_model(None); + if let Some(model) = model.set() { + if let Some(model) = super::openai::EmbeddingModel::from_name(&model) { + options.embedding_model = model; + } + } + if let Some(url) = url.set() { + options.url = Some(url); + } + if let Some(api_key) = api_key.set() { + options.api_key = Some(api_key); + } + if let Some(dimensions) = dimensions.set() { + options.dimensions = Some(dimensions); + } + options.distribution = distribution.set(); + SubEmbedderOptions::OpenAi(options) + } + fn hugging_face( + model: Setting, + revision: Setting, + pooling: Setting, + distribution: Setting, + ) -> Self { + let mut options = super::hf::EmbedderOptions::default(); + if let Some(model) = model.set() { + options.model = model; + // Reset the revision if we are setting the model. + // This allows the following: + // "huggingFace": {} -> default model with default revision + // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision + // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision + options.revision = None; + } + if let Some(revision) = revision.set() { + options.revision = Some(revision); + } + if let Some(pooling) = pooling.set() { + options.pooling = pooling; + } + options.distribution = distribution.set(); + SubEmbedderOptions::HuggingFace(options) + } + fn user_provided(dimensions: usize, distribution: Setting) -> Self { + Self::UserProvided(super::manual::EmbedderOptions { + dimensions, + distribution: distribution.set(), + }) + } + fn rest( + url: String, + api_key: Setting, + request: serde_json::Value, + response: serde_json::Value, + headers: Setting>, + dimensions: Setting, + distribution: Setting, + ) -> Self { + Self::Rest(super::rest::EmbedderOptions { + api_key: api_key.set(), + dimensions: dimensions.set(), + url, + request, + response, + distribution: distribution.set(), + headers: headers.set().unwrap_or_default(), + }) + } + fn ollama( + model: Setting, + url: Setting, + api_key: Setting, + dimensions: Setting, + distribution: Setting, + ) -> Self { + let mut options: ollama::EmbedderOptions = + super::ollama::EmbedderOptions::with_default_model( + api_key.set(), + url.set(), + dimensions.set(), + ); + if let Some(model) = model.set() { + options.embedding_model = model; + } + + options.distribution = distribution.set(); + SubEmbedderOptions::Ollama(options) + } +} + +impl From for EmbedderOptions { + fn from(value: SubEmbedderOptions) -> Self { + match value { + SubEmbedderOptions::HuggingFace(embedder_options) => { + Self::HuggingFace(embedder_options) + } + SubEmbedderOptions::OpenAi(embedder_options) => Self::OpenAi(embedder_options), + SubEmbedderOptions::Ollama(embedder_options) => Self::Ollama(embedder_options), + SubEmbedderOptions::UserProvided(embedder_options) => { + Self::UserProvided(embedder_options) + } + SubEmbedderOptions::Rest(embedder_options) => Self::Rest(embedder_options), + } + } +} From e374b095a2d4b7f2002855073b841b1c680ce698 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 13:57:25 +0100 Subject: [PATCH 09/16] Fix tests --- .../after_registering_settings_task.snap | 2 +- .../test_settings_update/settings_update_processed.snap | 2 +- .../import_vectors/Intel to kefir succeeds.snap | 2 +- .../test_embedders.rs/import_vectors/Intel to kefir.snap | 2 +- .../import_vectors/adding Intel succeeds.snap | 2 +- .../import_vectors/after adding Intel.snap | 2 +- .../after_registering_settings_task_vectors.snap | 2 +- .../import_vectors/settings_update_processed_vectors.snap | 2 +- crates/meilisearch/tests/vector/rest.rs | 4 ++-- crates/meilisearch/tests/vector/settings.rs | 6 +++--- crates/milli/src/update/index_documents/mod.rs | 2 ++ 11 files changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap index fb2a9de43..d9d8b0724 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap index f503e2a56..ca8a3e137 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap index fcbab1a07..74cdb9bc1 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap index d6a677999..16858361e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap index 2dc23b3b4..8daa10244 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap index 818cdd474..87a9ec11c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap index 172f80633..35bd9dee9 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap index 1635614e8..40e8f63e9 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs index bf6876fbe..82fc71b26 100644 --- a/crates/meilisearch/tests/vector/rest.rs +++ b/crates/meilisearch/tests/vector/rest.rs @@ -916,7 +916,7 @@ async fn bad_settings() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source rest)", + "message": "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source `rest`)", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" @@ -933,7 +933,7 @@ async fn bad_settings() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.rest`: Missing field `response` (note: this field is mandatory for source rest)", + "message": "`.embedders.rest`: Missing field `response` (note: this field is mandatory for source `rest`)", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 97fa496b4..88c670fb3 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -11,13 +11,13 @@ async fn field_unavailable_for_source() { let (response, code) = index .update_settings(json!({ - "embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}}, + "embedders": { "manual": {"source": "userProvided", "dimensions": 128, "documentTemplate": "{{doc.documentTemplate}}"}}, })) .await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided`.\n - note: `documentTemplate` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" @@ -32,7 +32,7 @@ async fn field_unavailable_for_source() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `documentTemplateMaxBytes`, `dimensions`, `distribution`, `url`, `binaryQuantized`", + "message": "`.embedders.default`: Field `revision` unavailable for source `openAi`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index d62128eaa..61153c8c0 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -2773,6 +2773,8 @@ mod tests { response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, binary_quantized: Setting::NotSet, }), ); From 24fe6cd2059e4674597a744cf80afac8d9ac5dcf Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 24 Feb 2025 16:24:04 +0100 Subject: [PATCH 10/16] Fix multiple embeddings in hf --- crates/milli/src/vector/hf.rs | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/crates/milli/src/vector/hf.rs b/crates/milli/src/vector/hf.rs index 3ec0a5b7c..60e40e367 100644 --- a/crates/milli/src/vector/hf.rs +++ b/crates/milli/src/vector/hf.rs @@ -255,34 +255,8 @@ impl Embedder { Ok(this) } - pub fn embed(&self, mut texts: Vec) -> std::result::Result, EmbedError> { - let tokens = match texts.len() { - 1 => vec![self - .tokenizer - .encode(texts.pop().unwrap(), true) - .map_err(EmbedError::tokenize)?], - _ => self.tokenizer.encode_batch(texts, true).map_err(EmbedError::tokenize)?, - }; - let token_ids = tokens - .iter() - .map(|tokens| { - let mut tokens = tokens.get_ids().to_vec(); - tokens.truncate(512); - Tensor::new(tokens.as_slice(), &self.model.device).map_err(EmbedError::tensor_shape) - }) - .collect::, EmbedError>>()?; - - let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?; - let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; - let embeddings = self - .model - .forward(&token_ids, &token_type_ids, None) - .map_err(EmbedError::model_forward)?; - - let embeddings = Self::pooling(embeddings, self.pooling)?; - - let embeddings: Vec = embeddings.to_vec2().map_err(EmbedError::tensor_shape)?; - Ok(embeddings) + pub fn embed(&self, texts: Vec) -> std::result::Result, EmbedError> { + texts.into_iter().map(|text| self.embed_one(&text)).collect() } fn pooling(embeddings: Tensor, pooling: Pooling) -> Result { From 3b2cd54b9db07fa0a4051ce2b4004f16239d6275 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 25 Feb 2025 17:24:45 +0100 Subject: [PATCH 11/16] tests: add a check to know if a Value has an uid --- crates/meilisearch/tests/common/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs index 52aa3b32d..4d57a6163 100644 --- a/crates/meilisearch/tests/common/mod.rs +++ b/crates/meilisearch/tests/common/mod.rs @@ -34,6 +34,10 @@ impl Value { } } + pub fn has_uid(&self) -> bool { + self["uid"].as_u64().is_some() || self["taskUid"].as_u64().is_some() + } + /// Return `true` if the `status` field is set to `succeeded`. /// Panic if the `status` field doesn't exists. #[track_caller] From b190b612a3532d1c78bee921478458af0b8c4db5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 25 Feb 2025 17:42:00 +0100 Subject: [PATCH 12/16] Add test on all parameters --- crates/meilisearch/tests/settings/mod.rs | 1 + .../huggingFace-apiKey-sending_code.snap | 4 + .../huggingFace-apiKey-sending_result.snap | 9 + ...gingFace-binaryQuantized-sending_code.snap | 4 + ...ngFace-binaryQuantized-sending_result.snap | 10 + ...ggingFace-binaryQuantized-task_result.snap | 24 ++ .../huggingFace-dimensions-sending_code.snap | 4 + ...huggingFace-dimensions-sending_result.snap | 9 + .../huggingFace-model-sending_code.snap | 4 + .../huggingFace-model-sending_result.snap | 10 + .../huggingFace-model-task_result.snap | 24 ++ .../huggingFace-pooling-sending_code.snap | 4 + .../huggingFace-pooling-sending_result.snap | 10 + .../huggingFace-pooling-task_result.snap | 24 ++ .../huggingFace-revision-sending_code.snap | 4 + .../huggingFace-revision-sending_result.snap | 10 + .../huggingFace-revision-task_result.snap | 29 ++ .../ollama-apiKey-sending_code.snap | 4 + .../ollama-apiKey-sending_result.snap | 10 + .../ollama-apiKey-task_result.snap | 26 ++ .../ollama-binaryQuantized-sending_code.snap | 4 + ...ollama-binaryQuantized-sending_result.snap | 10 + .../ollama-binaryQuantized-task_result.snap | 26 ++ .../ollama-dimensions-sending_code.snap | 4 + .../ollama-dimensions-sending_result.snap | 10 + .../ollama-dimensions-task_result.snap | 25 ++ .../ollama-model-sending_code.snap | 4 + .../ollama-model-sending_result.snap | 10 + .../ollama-model-task_result.snap | 25 ++ .../ollama-pooling-sending_code.snap | 4 + .../ollama-pooling-sending_result.snap | 9 + .../ollama-revision-sending_code.snap | 4 + .../ollama-revision-sending_result.snap | 9 + .../openAi-apiKey-sending_code.snap | 4 + .../openAi-apiKey-sending_result.snap | 10 + .../openAi-apiKey-task_result.snap | 24 ++ .../openAi-binaryQuantized-sending_code.snap | 4 + ...openAi-binaryQuantized-sending_result.snap | 10 + .../openAi-binaryQuantized-task_result.snap | 24 ++ .../openAi-dimensions-sending_code.snap | 4 + .../openAi-dimensions-sending_result.snap | 10 + .../openAi-dimensions-task_result.snap | 24 ++ .../openAi-model-sending_code.snap | 4 + .../openAi-model-sending_result.snap | 10 + .../openAi-model-task_result.snap | 24 ++ .../openAi-pooling-sending_code.snap | 4 + .../openAi-pooling-sending_result.snap | 9 + .../openAi-revision-sending_code.snap | 4 + .../openAi-revision-sending_result.snap | 9 + .../rest-apiKey-sending_code.snap | 4 + .../rest-apiKey-sending_result.snap | 10 + .../rest-apiKey-task_result.snap | 32 +++ .../rest-binaryQuantized-sending_code.snap | 4 + .../rest-binaryQuantized-sending_result.snap | 10 + .../rest-binaryQuantized-task_result.snap | 32 +++ .../rest-dimensions-sending_code.snap | 4 + .../rest-dimensions-sending_result.snap | 10 + .../rest-dimensions-task_result.snap | 31 ++ .../rest-model-sending_code.snap | 4 + .../rest-model-sending_result.snap | 9 + .../rest-pooling-sending_code.snap | 4 + .../rest-pooling-sending_result.snap | 9 + .../rest-revision-sending_code.snap | 4 + .../rest-revision-sending_result.snap | 9 + .../userProvided-apiKey-sending_code.snap | 4 + .../userProvided-apiKey-sending_result.snap | 9 + ...Provided-binaryQuantized-sending_code.snap | 4 + ...ovided-binaryQuantized-sending_result.snap | 10 + ...rProvided-binaryQuantized-task_result.snap | 25 ++ .../userProvided-dimensions-sending_code.snap | 4 + ...serProvided-dimensions-sending_result.snap | 10 + .../userProvided-dimensions-task_result.snap | 24 ++ .../userProvided-model-sending_code.snap | 4 + .../userProvided-model-sending_result.snap | 9 + .../userProvided-pooling-sending_code.snap | 4 + .../userProvided-pooling-sending_result.snap | 9 + .../userProvided-revision-sending_code.snap | 4 + .../userProvided-revision-sending_result.snap | 9 + ...huggingFace-distribution-sending_code.snap | 4 + ...ggingFace-distribution-sending_result.snap | 10 + .../huggingFace-distribution-task_result.snap | 27 ++ ...ingFace-documentTemplate-sending_code.snap | 4 + ...gFace-documentTemplate-sending_result.snap | 10 + ...gingFace-documentTemplate-task_result.snap | 24 ++ ...documentTemplateMaxBytes-sending_code.snap | 4 + ...cumentTemplateMaxBytes-sending_result.snap | 10 + ...-documentTemplateMaxBytes-task_result.snap | 24 ++ .../huggingFace-headers-sending_code.snap | 4 + .../huggingFace-headers-sending_result.snap | 9 + .../huggingFace-request-sending_code.snap | 4 + .../huggingFace-request-sending_result.snap | 9 + .../huggingFace-response-sending_code.snap | 4 + .../huggingFace-response-sending_result.snap | 9 + .../huggingFace-url-sending_code.snap | 4 + .../huggingFace-url-sending_result.snap | 9 + .../ollama-distribution-sending_code.snap | 4 + .../ollama-distribution-sending_result.snap | 10 + .../ollama-distribution-task_result.snap | 29 ++ .../ollama-documentTemplate-sending_code.snap | 4 + ...llama-documentTemplate-sending_result.snap | 10 + .../ollama-documentTemplate-task_result.snap | 26 ++ ...documentTemplateMaxBytes-sending_code.snap | 4 + ...cumentTemplateMaxBytes-sending_result.snap | 10 + ...-documentTemplateMaxBytes-task_result.snap | 26 ++ .../ollama-headers-sending_code.snap | 4 + .../ollama-headers-sending_result.snap | 9 + .../ollama-request-sending_code.snap | 4 + .../ollama-request-sending_result.snap | 9 + .../ollama-response-sending_code.snap | 4 + .../ollama-response-sending_result.snap | 9 + .../ollama-url-sending_code.snap | 4 + .../ollama-url-sending_result.snap | 10 + .../ollama-url-task_result.snap | 31 ++ .../openAi-distribution-sending_code.snap | 4 + .../openAi-distribution-sending_result.snap | 10 + .../openAi-distribution-task_result.snap | 27 ++ .../openAi-documentTemplate-sending_code.snap | 4 + ...penAi-documentTemplate-sending_result.snap | 10 + .../openAi-documentTemplate-task_result.snap | 24 ++ ...documentTemplateMaxBytes-sending_code.snap | 4 + ...cumentTemplateMaxBytes-sending_result.snap | 10 + ...-documentTemplateMaxBytes-task_result.snap | 24 ++ .../openAi-headers-sending_code.snap | 4 + .../openAi-headers-sending_result.snap | 9 + .../openAi-request-sending_code.snap | 4 + .../openAi-request-sending_result.snap | 9 + .../openAi-response-sending_code.snap | 4 + .../openAi-response-sending_result.snap | 9 + .../openAi-url-sending_code.snap | 4 + .../openAi-url-sending_result.snap | 10 + .../openAi-url-task_result.snap | 24 ++ .../rest-distribution-sending_code.snap | 4 + .../rest-distribution-sending_result.snap | 10 + .../rest-distribution-task_result.snap | 35 +++ .../rest-documentTemplate-sending_code.snap | 4 + .../rest-documentTemplate-sending_result.snap | 10 + .../rest-documentTemplate-task_result.snap | 32 +++ ...documentTemplateMaxBytes-sending_code.snap | 4 + ...cumentTemplateMaxBytes-sending_result.snap | 10 + ...-documentTemplateMaxBytes-task_result.snap | 32 +++ .../rest-headers-sending_code.snap | 4 + .../rest-headers-sending_result.snap | 10 + .../rest-headers-task_result.snap | 34 +++ .../rest-request-sending_code.snap | 4 + .../rest-request-sending_result.snap | 10 + .../rest-request-task_result.snap | 31 ++ .../rest-response-sending_code.snap | 4 + .../rest-response-sending_result.snap | 10 + .../rest-response-task_result.snap | 31 ++ .../rest-url-sending_code.snap | 4 + .../rest-url-sending_result.snap | 10 + .../rest-url-task_result.snap | 31 ++ ...serProvided-distribution-sending_code.snap | 4 + ...rProvided-distribution-sending_result.snap | 10 + ...userProvided-distribution-task_result.snap | 28 ++ ...rovided-documentTemplate-sending_code.snap | 4 + ...vided-documentTemplate-sending_result.snap | 9 + ...documentTemplateMaxBytes-sending_code.snap | 4 + ...cumentTemplateMaxBytes-sending_result.snap | 9 + .../userProvided-headers-sending_code.snap | 4 + .../userProvided-headers-sending_result.snap | 9 + .../userProvided-request-sending_code.snap | 4 + .../userProvided-request-sending_result.snap | 9 + .../userProvided-response-sending_code.snap | 4 + .../userProvided-response-sending_result.snap | 9 + .../userProvided-url-sending_code.snap | 4 + .../userProvided-url-sending_result.snap | 9 + crates/meilisearch/tests/settings/vectors.rs | 269 ++++++++++++++++++ 168 files changed, 2134 insertions(+) create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap create mode 100644 crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap create mode 100644 crates/meilisearch/tests/settings/vectors.rs diff --git a/crates/meilisearch/tests/settings/mod.rs b/crates/meilisearch/tests/settings/mod.rs index 67df4068a..6b61e6be0 100644 --- a/crates/meilisearch/tests/settings/mod.rs +++ b/crates/meilisearch/tests/settings/mod.rs @@ -4,3 +4,4 @@ mod get_settings; mod prefix_search_settings; mod proximity_settings; mod tokenizer_customization; +mod vectors; diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap new file mode 100644 index 000000000..3a9b5bfb8 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `apiKey` unavailable for source `huggingFace`.\n - note: `apiKey` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap new file mode 100644 index 000000000..8f0a4edfa --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap new file mode 100644 index 000000000..f5dc3b48f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `dimensions` unavailable for source `huggingFace`.\n - note: `dimensions` is available for sources: `openAi`, `ollama`, `userProvided`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap new file mode 100644 index 000000000..757a7b89f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap new file mode 100644 index 000000000..12d199767 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "pooling": "forceMean" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap new file mode 100644 index 000000000..78d4c44cc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap @@ -0,0 +1,29 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: error: fetching file from HG_HUB failed:\n - request error: https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/config.json: status code 404", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap new file mode 100644 index 000000000..ac3780eb1 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "apiKey": "XXX...", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap new file mode 100644 index 000000000..b9ae269bb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap new file mode 100644 index 000000000..aef2ba2b0 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap new file mode 100644 index 000000000..aef2ba2b0 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap new file mode 100644 index 000000000..110555f8b --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `ollama`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap new file mode 100644 index 000000000..a220caa82 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `ollama`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap new file mode 100644 index 000000000..0cca31fb7 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "apiKey": "XXX..." + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap new file mode 100644 index 000000000..329e88cac --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap new file mode 100644 index 000000000..b63a458ca --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap new file mode 100644 index 000000000..daa87d395 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "model": "text-embedding-3-small" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap new file mode 100644 index 000000000..958b5184a --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `openAi`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap new file mode 100644 index 000000000..acfdeac87 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `openAi`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap new file mode 100644 index 000000000..ed8a6b2ea --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "apiKey": "XXX...", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap new file mode 100644 index 000000000..12fd314f5 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "binaryQuantized": false, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap new file mode 100644 index 000000000..8ac20a01c --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `model` unavailable for source `rest`.\n - note: `model` is available for sources: `openAi`, `huggingFace`, `ollama`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap new file mode 100644 index 000000000..31a2a7d15 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `rest`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap new file mode 100644 index 000000000..d732ac50c --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `rest`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap new file mode 100644 index 000000000..e47bd1e7f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `apiKey` unavailable for source `userProvided`.\n - note: `apiKey` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap new file mode 100644 index 000000000..93102fbe5 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768, + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap new file mode 100644 index 000000000..e095014fd --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap new file mode 100644 index 000000000..acb26f215 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `model` unavailable for source `userProvided`.\n - note: `model` is available for sources: `openAi`, `huggingFace`, `ollama`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap new file mode 100644 index 000000000..466826779 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `userProvided`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap new file mode 100644 index 000000000..821d9550d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `userProvided`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap new file mode 100644 index 000000000..0c60b1c6e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap @@ -0,0 +1,27 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap new file mode 100644 index 000000000..b7f10fd11 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..93401b927 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap new file mode 100644 index 000000000..38f95e6cb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `huggingFace`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap new file mode 100644 index 000000000..83fc14a3f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `huggingFace`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap new file mode 100644 index 000000000..f4e2f4a6f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `huggingFace`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap new file mode 100644 index 000000000..3f18f89bd --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `url` unavailable for source `huggingFace`.\n - note: `url` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap new file mode 100644 index 000000000..5b0056604 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap @@ -0,0 +1,29 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap new file mode 100644 index 000000000..1b42db77b --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..a2e8024a6 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap new file mode 100644 index 000000000..600e8271d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `ollama`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap new file mode 100644 index 000000000..b257b474e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `ollama`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap new file mode 100644 index 000000000..de06524f1 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `ollama`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap new file mode 100644 index 000000000..4eaf0ba2f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "url": "http://rest.example/" + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: unsupported Ollama URL.\n - For `ollama` sources, the URL must end with `/api/embed` or `/api/embeddings`\n - Got `http://rest.example/`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap new file mode 100644 index 000000000..eb6eaf59d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap @@ -0,0 +1,27 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap new file mode 100644 index 000000000..d1ad94953 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..dca04b8c2 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap new file mode 100644 index 000000000..117268660 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `openAi`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap new file mode 100644 index 000000000..dcf8000eb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `openAi`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap new file mode 100644 index 000000000..d834bc900 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `openAi`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap new file mode 100644 index 000000000..78d2b853e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "url": "http://rest.example/" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap new file mode 100644 index 000000000..96841efcc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap @@ -0,0 +1,35 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + }, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap new file mode 100644 index 000000000..f9bb045ad --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "documentTemplate": "toto", + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..5085ab19e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "documentTemplateMaxBytes": 200, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap new file mode 100644 index 000000000..db6434f0e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap @@ -0,0 +1,34 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + }, + "headers": { + "custom": "value" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap new file mode 100644 index 000000000..be731d19f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap @@ -0,0 +1,28 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap new file mode 100644 index 000000000..4922d21cc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `documentTemplate` unavailable for source `userProvided`.\n - note: `documentTemplate` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..1899cc0a8 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `documentTemplateMaxBytes` unavailable for source `userProvided`.\n - note: `documentTemplateMaxBytes` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap new file mode 100644 index 000000000..1cd308942 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `userProvided`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap new file mode 100644 index 000000000..48f8ca1eb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `userProvided`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap new file mode 100644 index 000000000..76c1c8f68 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `userProvided`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap new file mode 100644 index 000000000..7469b3943 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `url` unavailable for source `userProvided`.\n - note: `url` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/vectors.rs b/crates/meilisearch/tests/settings/vectors.rs new file mode 100644 index 000000000..fb7c6dbf9 --- /dev/null +++ b/crates/meilisearch/tests/settings/vectors.rs @@ -0,0 +1,269 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{Server, Value}; + +macro_rules! parameter_test { + ($server:ident, $source:tt, $param:tt) => { + let source = stringify!($source); + let param = stringify!($param); + let index = $server.index("test"); + + let (response, _code) = index + .update_settings(crate::json!({ + "embedders": { + "test": null, + } + })) + .await; + $server.wait_task(response.uid()).await.succeeded(); + + let mut value = base_for_source(source); + value[param] = valid_parameter(source, param).0; + let (response, code) = index + .update_settings(crate::json!({ + "embedders": { + "test": value + } + })) + .await; + snapshot!(code, name: concat!(stringify!($source), "-", stringify!($param), "-sending_code")); + snapshot!(json_string!(response, {".enqueuedAt" => "[enqueuedAt]", ".taskUid" => "[taskUid]"}), name: concat!(stringify!($source), "-", stringify!($param), "-sending_result")); + + if response.has_uid() { + let response = $server.wait_task(response.uid()).await; + snapshot!(json_string!(response, {".enqueuedAt" => "[enqueuedAt]", + ".uid" => "[uid]", ".batchUid" => "[batchUid]", + ".duration" => "[duration]", + ".startedAt" => "[startedAt]", + ".finishedAt" => "[finishedAt]"}), name: concat!(stringify!($source), "-", stringify!($param), "-task_result")); + } + + }; +} + +#[actix_rt::test] +async fn bad_parameters() { + let server = Server::new().await; + + // for each source, check which parameters are allowed/disallowed + // model + // - openai + parameter_test!(server, openAi, model); + // - huggingFace + parameter_test!(server, huggingFace, model); + // - userProvided + parameter_test!(server, userProvided, model); + // - ollama + parameter_test!(server, ollama, model); + // - rest + parameter_test!(server, rest, model); + // == + + // revision + // - openai + parameter_test!(server, openAi, revision); + // - huggingFace + parameter_test!(server, huggingFace, revision); + // - userProvided + parameter_test!(server, userProvided, revision); + // - ollama + parameter_test!(server, ollama, revision); + // - rest + parameter_test!(server, rest, revision); + // == + + // pooling + // - openai + parameter_test!(server, openAi, pooling); + // - huggingFace + parameter_test!(server, huggingFace, pooling); + // - userProvided + parameter_test!(server, userProvided, pooling); + // - ollama + parameter_test!(server, ollama, pooling); + // - rest + parameter_test!(server, rest, pooling); + // == + + // apiKey + // - openai + parameter_test!(server, openAi, apiKey); + // - huggingFace + parameter_test!(server, huggingFace, apiKey); + // - userProvided + parameter_test!(server, userProvided, apiKey); + // - ollama + parameter_test!(server, ollama, apiKey); + // - rest + parameter_test!(server, rest, apiKey); + // == + + // dimensions + // - openai + parameter_test!(server, openAi, dimensions); + // - huggingFace + parameter_test!(server, huggingFace, dimensions); + // - userProvided + parameter_test!(server, userProvided, dimensions); + // - ollama + parameter_test!(server, ollama, dimensions); + // - rest + parameter_test!(server, rest, dimensions); + // == + + // binaryQuantized + // - openai + parameter_test!(server, openAi, binaryQuantized); + // - huggingFace + parameter_test!(server, huggingFace, binaryQuantized); + // - userProvided + parameter_test!(server, userProvided, binaryQuantized); + // - ollama + parameter_test!(server, ollama, binaryQuantized); + // - rest + parameter_test!(server, rest, binaryQuantized); + // == + + // for each source, check that removing mandatory parameters is a failure +} + +#[actix_rt::test] +async fn bad_parameters_2() { + let server = Server::new().await; + + // documentTemplate + // - openai + parameter_test!(server, openAi, documentTemplate); + // - huggingFace + parameter_test!(server, huggingFace, documentTemplate); + // - userProvided + parameter_test!(server, userProvided, documentTemplate); + // - ollama + parameter_test!(server, ollama, documentTemplate); + // - rest + parameter_test!(server, rest, documentTemplate); + // == + + // documentTemplateMaxBytes + // - openai + parameter_test!(server, openAi, documentTemplateMaxBytes); + // - huggingFace + parameter_test!(server, huggingFace, documentTemplateMaxBytes); + // - userProvided + parameter_test!(server, userProvided, documentTemplateMaxBytes); + // - ollama + parameter_test!(server, ollama, documentTemplateMaxBytes); + // - rest + parameter_test!(server, rest, documentTemplateMaxBytes); + // == + + // url + // - openai + parameter_test!(server, openAi, url); + // - huggingFace + parameter_test!(server, huggingFace, url); + // - userProvided + parameter_test!(server, userProvided, url); + // - ollama + parameter_test!(server, ollama, url); + // - rest + parameter_test!(server, rest, url); + // == + + // request + // - openai + parameter_test!(server, openAi, request); + // - huggingFace + parameter_test!(server, huggingFace, request); + // - userProvided + parameter_test!(server, userProvided, request); + // - ollama + parameter_test!(server, ollama, request); + // - rest + parameter_test!(server, rest, request); + // == + + // response + // - openai + parameter_test!(server, openAi, response); + // - huggingFace + parameter_test!(server, huggingFace, response); + // - userProvided + parameter_test!(server, userProvided, response); + // - ollama + parameter_test!(server, ollama, response); + // - rest + parameter_test!(server, rest, response); + // == + + // headers + // - openai + parameter_test!(server, openAi, headers); + // - huggingFace + parameter_test!(server, huggingFace, headers); + // - userProvided + parameter_test!(server, userProvided, headers); + // - ollama + parameter_test!(server, ollama, headers); + // - rest + parameter_test!(server, rest, headers); + // == + + // distribution + // - openai + parameter_test!(server, openAi, distribution); + // - huggingFace + parameter_test!(server, huggingFace, distribution); + // - userProvided + parameter_test!(server, userProvided, distribution); + // - ollama + parameter_test!(server, ollama, distribution); + // - rest + parameter_test!(server, rest, distribution); + // == +} + +fn base_for_source(source: &'static str) -> Value { + let base_parameters = maplit::btreemap! { + "openAi" => vec![], + "huggingFace" => vec![], + "userProvided" => vec!["dimensions"], + "ollama" => vec!["model", + // add dimensions to avoid actually fetching the model from ollama + "dimensions"], + "rest" => vec!["url", "request", "response", + // add dimensions to avoid actually fetching the model from ollama + "dimensions"], + }; + + let mut value = crate::json!({ + "source": source + }); + + let mandatory_parameters = base_parameters.get(source).unwrap(); + for mandatory_parameter in mandatory_parameters { + value[mandatory_parameter] = valid_parameter(source, mandatory_parameter).0; + } + value +} + +fn valid_parameter(source: &'static str, parameter: &'static str) -> Value { + match (source, parameter) { + ("openAi", "model") => crate::json!("text-embedding-3-small"), + ("huggingFace", "model") => crate::json!("sentence-transformers/all-MiniLM-L6-v2"), + (_, "model") => crate::json!("all-minilm"), + (_, "revision") => crate::json!("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), + (_, "pooling") => crate::json!("forceMean"), + (_, "apiKey") => crate::json!("foo"), + (_, "dimensions") => crate::json!(768), + (_, "binaryQuantized") => crate::json!(false), + (_, "documentTemplate") => crate::json!("toto"), + (_, "documentTemplateMaxBytes") => crate::json!(200), + (_, "url") => crate::json!("http://rest.example/"), + (_, "request") => crate::json!({"text": "{{text}}"}), + (_, "response") => crate::json!({"embedding": "{{embedding}}"}), + (_, "headers") => crate::json!({"custom": "value"}), + (_, "distribution") => crate::json!({"mean": 0.4, "sigma": 0.1}), + _ => panic!("unknown parameter"), + } +} From 57a6beee3071ee1bac673b59f94bdd2da81d7de7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 26 Feb 2025 15:11:01 +0100 Subject: [PATCH 13/16] Test composite embedders --- crates/meilisearch/tests/vector/settings.rs | 548 ++++++++++++++++++++ 1 file changed, 548 insertions(+) diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 88c670fb3..9fed808b0 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -407,3 +407,551 @@ async fn ollama_url_checks() { } "###); } + +#[actix_rt::test] +async fn composite_checks() { + let server = Server::new().await; + let index = server.index("test"); + // inner distribution + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "distribution": { + "mean": 0.5, + "sigma": 0.2, + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `distribution` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `distribution` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // manual source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "userProvided", + "dimensions": 42, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `userProvided` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // composite source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `composite` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": {}, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.indexingEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": {}, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `indexingEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `searchEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // inner quantized + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": true, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": false, + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `binaryQuantized` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `binaryQuantized` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // prompt in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "documentTemplate": "toto", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `documentTemplate` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `documentTemplate` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // dimensions don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "dimensions": 0x42, + "model": "does-not-exist", + }, + "indexingEmbedder": { + "source": "ollama", + "dimensions": 42, + "model": "does-not-exist", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 66 + }, + "indexingEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 42 + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: 66\n - Indexing time dimensions: 42\n - Note: Dimensions of embeddings produced by both embedders are required to match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + // pooling don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance 0.25\n - Meilisearch requires a maximum distance of 0.01.\n - Note: check that both embedders produce similar embeddings.\n - Make sure the `model`, `revision` and `pooling` of both embedders match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // ok + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} From 73d2dbd60ff3bd181787b814b81b70abf911131b Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 26 Feb 2025 15:45:19 +0100 Subject: [PATCH 14/16] Error handling --- crates/milli/src/vector/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs index 0993ded1d..685022de8 100644 --- a/crates/milli/src/vector/error.rs +++ b/crates/milli/src/vector/error.rs @@ -497,7 +497,7 @@ pub enum NewEmbedderErrorKind { CompositeTestEmbeddingFailed { inner: EmbedError, failing_embedder: &'static str }, #[error("error while generating test embeddings.\n - the number of generated embeddings differs.\n - {search_count} embeddings for the search time embedder.\n - {index_count} embeddings for the indexing time embedder.")] CompositeEmbeddingCountMismatch { search_count: usize, index_count: usize }, - #[error("error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance {distance}\n - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n - Note: check that both embedders produce similar embeddings.{hint}")] + #[error("error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance {distance:.2}\n - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n - Note: check that both embedders produce similar embeddings.{hint}")] CompositeEmbeddingValueMismatch { distance: f32, hint: CompositeEmbedderContainsHuggingFace }, } From afb4b9677f53f41864d9fcba99428ff47a79a880 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 5 Mar 2025 17:47:12 +0100 Subject: [PATCH 15/16] Remove Embedder:embed --- crates/milli/src/vector/mod.rs | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index a253963d2..f67912b89 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -637,23 +637,7 @@ impl Embedder { }) } - /// Embed one or multiple texts. - /// - /// Each text can be embedded as one or multiple embeddings. - fn embed( - &self, - texts: Vec, - deadline: Option, - ) -> std::result::Result, EmbedError> { - match self { - Embedder::HuggingFace(embedder) => embedder.embed(texts), - Embedder::OpenAi(embedder) => embedder.embed(&texts, deadline), - Embedder::Ollama(embedder) => embedder.embed(&texts, deadline), - Embedder::UserProvided(embedder) => embedder.embed(&texts), - Embedder::Rest(embedder) => embedder.embed(texts, deadline), - Embedder::Composite(embedder) => embedder.search.embed(texts, deadline), - } - } + /// Embed in search context #[tracing::instrument(level = "debug", skip_all, target = "search")] pub fn embed_search( @@ -661,7 +645,15 @@ impl Embedder { text: String, deadline: Option, ) -> std::result::Result { - let mut embedding = self.embed(vec![text], deadline)?; + let texts = vec![text]; + let mut embedding = match self { + Embedder::HuggingFace(embedder) => embedder.embed(texts), + Embedder::OpenAi(embedder) => embedder.embed(&texts, deadline), + Embedder::Ollama(embedder) => embedder.embed(&texts, deadline), + Embedder::UserProvided(embedder) => embedder.embed(&texts), + Embedder::Rest(embedder) => embedder.embed(texts, deadline), + Embedder::Composite(embedder) => embedder.search.embed(texts, deadline), + }?; let embedding = embedding.pop().ok_or_else(EmbedError::missing_embedding)?; Ok(embedding) } From 4fab72cbea3eb0867927fd9788ad324d824a7e57 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 5 Mar 2025 17:48:31 +0100 Subject: [PATCH 16/16] Rename SettingsDiff::diff to SettingsDiff::apply_and_diff --- crates/milli/src/vector/settings.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index 610597dd5..3948ad4d8 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -661,7 +661,7 @@ impl SettingsDiff { let mut reindex_action = None; - Self::diff( + Self::apply_and_diff( &mut reindex_action, &mut source, &mut model, @@ -791,7 +791,7 @@ impl SettingsDiff { indexing_embedder: new_indexing_embedder, } = new_sub_embedder; - Self::diff( + Self::apply_and_diff( reindex_action, &mut source, &mut model, @@ -852,7 +852,7 @@ impl SettingsDiff { } #[allow(clippy::too_many_arguments)] - fn diff( + fn apply_and_diff( reindex_action: &mut Option, source: &mut Setting, model: &mut Setting,