From e44325683abc93809eaa8c5ea6cdcfb658248e12 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:40:33 +0200 Subject: [PATCH 01/60] Facet distribution: fix issue where truncated facet distribution would have a wrong order --- milli/src/search/facet/facet_distribution.rs | 23 +++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 62ae05740..fb1a255f3 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -100,7 +100,6 @@ impl<'a> FacetDistribution<'a> { let mut lexicographic_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); - let distribution_prelength = distribution.len(); let db = self.index.field_id_docid_facet_f64s; for docid in candidates { key_buffer.truncate(mem::size_of::()); @@ -113,23 +112,21 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, _, value), ()) = result?; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; - - if lexicographic_distribution.len() - distribution_prelength - == self.max_values_per_facet - { - break; - } } } - distribution.extend(lexicographic_distribution); + distribution.extend( + lexicographic_distribution + .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())), + ); } FacetType::String => { let mut normalized_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let db = self.index.field_id_docid_facet_strings; - 'outer: for docid in candidates { + for docid in candidates { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db @@ -144,14 +141,14 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_per_facet { - break 'outer; - } + // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid, + // so higher ranked facets could be in later docids } } let iter = normalized_distribution .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())) .map(|(_normalized, (original, count))| (original.to_string(), count)); distribution.extend(iter); } @@ -467,7 +464,7 @@ mod tests { .execute() .unwrap(); - milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); let map = FacetDistribution::new(&txn, &index) .facets(iter::once(("colour", OrderBy::Count))) From 23e14138bbb6d62d3e0a8745d538b6b7ac90d8b2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:41:01 +0200 Subject: [PATCH 02/60] facet distribution: implement Display for OrderBy --- milli/src/search/facet/facet_distribution.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index fb1a255f3..a63bb634b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -37,6 +38,15 @@ pub enum OrderBy { Count, } +impl Display for OrderBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OrderBy::Lexicographic => f.write_str("alphabetically"), + OrderBy::Count => f.write_str("by count"), + } + } +} + pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, From a94a87ee5417816db870c1aeb542e0ad37074890 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 11 Sep 2024 11:25:26 +0200 Subject: [PATCH 03/60] Slightly changes existing error messages --- meilisearch/src/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..c7b109598 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -27,9 +27,9 @@ pub enum MeilisearchHttpError { EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), - #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] + #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] FederationOptionsInNonFederatedRequest(usize), - #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] + #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), #[error("A {0} payload is missing.")] MissingPayload(PayloadType), From a48b1d5a791406113964799c4908c775f85b551d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:51:54 +0200 Subject: [PATCH 04/60] Update existing tests following error message changes --- meilisearch/tests/search/multi.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 08ad0b18c..f92b9bfc8 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3799,7 +3799,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `limit` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `limit` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3815,7 +3815,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `offset` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `offset` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3831,7 +3831,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `page` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `page` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3847,7 +3847,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `hitsPerPage` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `hitsPerPage` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3875,7 +3875,7 @@ async fn federation_non_federated_contains_federation_option() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #1 or add `federation: {}` to the request.", + "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #1 or add `federation` to the request.", "code": "invalid_multi_search_federation_options", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_federation_options" From 72cc573e0acb2937e2ee66a034291a2f0a5caeb7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:47:00 +0200 Subject: [PATCH 05/60] Add new error types --- meilisearch-types/src/error.rs | 6 ++++++ meilisearch/src/error.rs | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 0099cada5..bf89fe614 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,10 +238,16 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchSortFacetValuesBy , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index c7b109598..fa315837f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; +use meilisearch_types::milli::OrderBy; use serde_json::Value; use tokio::task::JoinError; @@ -31,6 +32,16 @@ pub enum MeilisearchHttpError { FederationOptionsInNonFederatedRequest(usize), #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #{0} or remove `federation` from the request")] + FacetsInFederatedQuery(usize), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + InconsistentFacetOrder { + facet: String, + previous_facet_order: OrderBy, + previous_uid: String, + index_facet_order: OrderBy, + current_uid: String, + }, #[error("A {0} payload is missing.")] MissingPayload(PayloadType), #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] @@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } + MeilisearchHttpError::FacetsInFederatedQuery(_) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::InconsistentFacetOrder { .. } => { + Code::InvalidMultiSearchFacetOrder + } } } } From 57f9517a987579a2ef8759e182622502afcaa353 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:47:15 +0200 Subject: [PATCH 06/60] Required changes to IndexUid --- meilisearch-types/src/index_uid.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index 341ab02cb..d64a6658d 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::error::Error; use std::fmt; use std::str::FromStr; @@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] pub struct IndexUid(String); @@ -70,6 +71,12 @@ impl From for String { } } +impl Borrow for IndexUid { + fn borrow(&self) -> &String { + &self.0 + } +} + #[derive(Debug)] pub struct IndexUidFormatError { pub invalid_uid: String, From 7c084b1286d6b7374d6d034181a418059177c253 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:48:26 +0200 Subject: [PATCH 07/60] SearchQueriesWithIndex changes --- meilisearch/src/search/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..e8e1fec37 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -441,9 +441,6 @@ pub struct SearchQueryWithIndex { } impl SearchQueryWithIndex { - pub fn has_federation_options(&self) -> bool { - self.federation_options.is_some() - } pub fn has_pagination(&self) -> Option<&'static str> { if self.offset.is_some() { Some("offset") @@ -458,6 +455,11 @@ impl SearchQueryWithIndex { } } + pub fn has_facets(&self) -> bool { + let Some(facets) = &self.facets else { return false }; + !facets.is_empty() + } + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, From f6114a1ff263655339060e395b5865c2bed11940 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:49:03 +0200 Subject: [PATCH 08/60] Introduce ComputedFacets and compute_facet_distribution_stats --- meilisearch/src/search/mod.rs | 89 ++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index e8e1fec37..99245bdc1 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -989,39 +989,13 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } }; - let (facet_distribution, facet_stats) = match facets { - Some(ref fields) => { - let mut facet_distribution = index.facets_distribution(&rtxn); - - let max_values_by_facet = index - .max_values_per_facet(&rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - let sort_facet_values_by = - index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - - if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = - fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); - facet_distribution.facets(fields); - } - - let distribution = facet_distribution - .candidates(candidates) - .default_order_by(sort_facet_values_by.get("*")) - .execute()?; - let stats = facet_distribution.compute_stats()?; - (Some(distribution), Some(stats)) - } - None => (None, None), - }; - - let facet_stats = facet_stats.map(|stats| { - stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() - }); + let (facet_distribution, facet_stats) = facets + .map(move |facets| { + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, None, None) + }) + .transpose()? + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); let result = SearchResult { hits: documents, @@ -1037,6 +1011,55 @@ pub fn perform_search( Ok(result) } +#[derive(Debug, Clone, Default, Serialize)] +pub struct ComputedFacets { + pub distribution: BTreeMap>, + pub stats: BTreeMap, +} + +fn compute_facet_distribution_stats>( + facets: &[S], + index: &Index, + rtxn: &RoTxn, + candidates: roaring::RoaringBitmap, + override_max_values_per_facet: Option, + override_sort_facet_values_by: Option, +) -> Result { + let mut facet_distribution = index.facets_distribution(rtxn); + + let max_values_by_facet = match override_max_values_per_facet { + Some(max_values_by_facet) => max_values_by_facet, + None => index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET), + }; + + facet_distribution.max_values_per_facet(max_values_by_facet); + + let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; + + let sort_facet_values_by = |n: &str| match override_sort_facet_values_by { + Some(order_by) => order_by, + None => sort_facet_values_by.get(n), + }; + + // add specific facet if there is no placeholder + if facets.iter().all(|f| f.as_ref() != "*") { + let fields: Vec<_> = facets.iter().map(|n| (n, sort_facet_values_by(n.as_ref()))).collect(); + facet_distribution.facets(fields); + } + + let distribution = facet_distribution + .candidates(candidates) + .default_order_by(sort_facet_values_by("*")) + .execute()?; + let stats = facet_distribution.compute_stats()?; + let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); + Ok(ComputedFacets { distribution, stats }) +} + pub fn search_from_kind( search_kind: SearchKind, search: milli::Search<'_>, From 7b55462610d0bfe3a07c0eab6d57c71803619db4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:50:03 +0200 Subject: [PATCH 09/60] BREAKING CHANGE: errors if queries.facets in federated search --- meilisearch/src/search/federated.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 58005ec53..f1acf5aa4 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -342,6 +342,10 @@ pub fn perform_federated_search( .into()); } + if federated_query.has_facets() { + return Err(MeilisearchHttpError::FacetsInFederatedQuery(query_index).into()); + } + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { From 533f1d4345d9069f048ecd50555ec83ad213f87b Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:51:20 +0200 Subject: [PATCH 10/60] Federated search: support facets --- meilisearch/src/search/federated.rs | 363 ++++++++++++++++++++++++++-- 1 file changed, 347 insertions(+), 16 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index f1acf5aa4..9d16ca59d 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec}; use actix_http::StatusCode; use index_scheduler::{IndexScheduler, RoFeatures}; +use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ - InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, + InvalidMultiSearchMergeFacets, InvalidMultiSearchSortFacetValuesBy, InvalidMultiSearchWeight, + InvalidSearchLimit, InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; -use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; use super::ranking_rules::{self, RankingRules}; use super::{ - prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, - SearchQuery, SearchQueryWithIndex, + compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, + HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, }; use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; @@ -73,6 +77,59 @@ pub struct Federation { pub limit: usize, #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, + #[deserr(default, error = DeserrJsonError)] + pub facets_by_index: BTreeMap>>, + #[deserr(default, error = DeserrJsonError)] + pub merge_facets: Option, +} + +#[derive(Copy, Clone, Debug, deserr::Deserr, Default)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct MergeFacets { + #[deserr(default, error = DeserrJsonError)] + pub sort_facet_values_by: SortFacetValuesBy, + #[deserr(default, error = DeserrJsonError)] + pub max_values_per_facet: Option, +} + +impl MergeFacets { + pub fn to_components(this: Option) -> (Option, Option) { + match this { + Some(MergeFacets { sort_facet_values_by, max_values_per_facet }) => { + (sort_facet_values_by.into(), max_values_per_facet) + } + None => (None, None), + } + } +} + +#[derive(Debug, deserr::Deserr, Default, Clone, Copy)] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub enum SortFacetValuesBy { + #[default] + IndexSettings, + /// By lexicographic order... + Alpha, + /// Or by number of docids in common? + Count, +} + +impl From for Option { + fn from(value: SortFacetValuesBy) -> Self { + match value { + SortFacetValuesBy::Alpha => Some(OrderBy::Lexicographic), + SortFacetValuesBy::Count => Some(OrderBy::Count), + SortFacetValuesBy::IndexSettings => None, + } + } +} + +#[derive(Debug, deserr::Deserr, Default)] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub enum GroupFacetsBy { + Facet, + #[default] + Index, } #[derive(Debug, deserr::Deserr)] @@ -82,7 +139,7 @@ pub struct FederatedSearch { #[deserr(default)] pub federation: Option, } -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone)] #[serde(rename_all = "camelCase")] pub struct FederatedSearchResult { pub hits: Vec, @@ -93,6 +150,13 @@ pub struct FederatedSearchResult { #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, + #[serde(skip_serializing_if = "FederatedFacets::is_empty")] + pub facets_by_index: FederatedFacets, + // These fields are only used for analytics purposes #[serde(skip)] pub degraded: bool, @@ -109,6 +173,9 @@ impl fmt::Debug for FederatedSearchResult { semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -122,9 +189,18 @@ impl fmt::Debug for FederatedSearchResult { if *degraded { debug.field("degraded", degraded); } + if let Some(facet_distribution) = facet_distribution { + debug.field("facet_distribution", &facet_distribution); + } + if let Some(facet_stats) = facet_stats { + debug.field("facet_stats", &facet_stats); + } if let Some(semantic_hit_count) = semantic_hit_count { debug.field("semantic_hit_count", &semantic_hit_count); } + if !facets_by_index.is_empty() { + debug.field("facets_by_index", &facets_by_index); + } debug.finish() } @@ -313,16 +389,111 @@ struct SearchHitByIndex { } struct SearchResultByIndex { + index: String, hits: Vec, - candidates: RoaringBitmap, + estimated_total_hits: usize, degraded: bool, used_negative_operator: bool, + facets: Option, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FederatedFacets(pub BTreeMap); + +impl FederatedFacets { + pub fn insert(&mut self, index: String, facets: Option) { + if let Some(facets) = facets { + self.0.insert(index, facets); + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn merge( + self, + MergeFacets { sort_facet_values_by, max_values_per_facet }: MergeFacets, + facet_order: Option>, + ) -> Option { + if self.is_empty() { + return None; + } + + let mut distribution: BTreeMap = Default::default(); + let mut stats: BTreeMap = Default::default(); + + for facets_by_index in self.0.into_values() { + for (facet, index_distribution) in facets_by_index.distribution { + match distribution.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_distribution); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let distribution = entry.get_mut(); + + for (value, index_count) in index_distribution { + distribution + .entry(value) + .and_modify(|count| *count += index_count) + .or_insert(index_count); + } + } + } + } + + for (facet, index_stats) in facets_by_index.stats { + match stats.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_stats); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let stats = entry.get_mut(); + + stats.min = + if stats.min <= index_stats.min { stats.min } else { index_stats.min }; + stats.max = + if stats.max >= index_stats.max { stats.max } else { index_stats.max }; + } + } + } + } + + // fixup order + for (facet, values) in &mut distribution { + let order_by = Option::::from(sort_facet_values_by) + .or_else(|| match &facet_order { + Some(facet_order) => facet_order.get(facet).map(|(_, order)| *order), + None => None, + }) + .unwrap_or_default(); + + match order_by { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + + if let Some(max_values_per_facet) = max_values_per_facet { + values.truncate(max_values_per_facet) + }; + } + + Some(ComputedFacets { distribution, stats }) + } } pub fn perform_federated_search( index_scheduler: &IndexScheduler, queries: Vec, - federation: Federation, + mut federation: Federation, features: RoFeatures, ) -> Result { let before_search = std::time::Instant::now(); @@ -357,13 +528,29 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; + + let (override_sort_facet_values_by, override_max_values_per_facet) = + MergeFacets::to_components(federation.merge_facets); + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; let mut results_by_index = Vec::with_capacity(queries_by_index.len()); let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + // remember the order and name of first index for each facet when merging with index settings + // to detect if the order is inconsistent for a facet. + let mut facet_order: Option> = match federation.merge_facets + { + Some(MergeFacets { sort_facet_values_by: SortFacetValuesBy::IndexSettings, .. }) => { + Some(Default::default()) + } + _ => None, + }; + for (index_uid, queries) in queries_by_index { + let first_query_index = queries.first().map(|query| query.query_index); + let index = match index_scheduler.index(&index_uid) { Ok(index) => index, Err(err) => { @@ -371,9 +558,8 @@ pub fn perform_federated_search( // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; - if let Some(query) = queries.first() { - err.message = - format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + if let Some(query_index) = first_query_index { + err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); } return Err(err); } @@ -398,6 +584,23 @@ pub fn perform_federated_search( let mut used_negative_operator = false; let mut candidates = RoaringBitmap::new(); + let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); + + // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", + if let Some(query_index) = first_query_index { + format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + return Err(error); + } + // 2.1. Compute all candidates for each query in the index let mut results_by_query = Vec::with_capacity(queries.len()); @@ -566,34 +769,118 @@ pub fn perform_federated_search( .collect(); let merged_result = merged_result?; + + let estimated_total_hits = candidates.len() as usize; + + let facets = facets_by_index + .map(|facets_by_index| { + compute_facet_distribution_stats( + &facets_by_index, + &index, + &rtxn, + candidates, + override_max_values_per_facet, + override_sort_facet_values_by, + ) + }) + .transpose() + .map_err(|mut error| { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", + error.message, + if let Some(query_index) = first_query_index { + format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + error + })?; + results_by_index.push(SearchResultByIndex { + index: index_uid, hits: merged_result, - candidates, + estimated_total_hits, degraded, used_negative_operator, + facets, }); } + // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. + for (index_uid, facets) in federation.facets_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", + err.message + ); + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n Note: index `{index_uid}` is not used in queries", + ); + return Err(error); + } + + if let Some(facets) = facets { + if let Err(mut error) = compute_facet_distribution_stats( + &facets, + &index, + &rtxn, + Default::default(), + override_max_values_per_facet, + override_sort_facet_values_by, + ) { + error.message = + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); + return Err(error); + } + } + } + // 3. merge hits and metadata across indexes // 3.1 merge metadata - let (estimated_total_hits, degraded, used_negative_operator) = { + let (estimated_total_hits, degraded, used_negative_operator, facets) = { let mut estimated_total_hits = 0; let mut degraded = false; let mut used_negative_operator = false; + let mut facets: FederatedFacets = FederatedFacets::default(); + for SearchResultByIndex { + index, hits: _, - candidates, + estimated_total_hits: estimated_total_hits_by_index, + facets: facets_by_index, degraded: degraded_by_index, used_negative_operator: used_negative_operator_by_index, - } in &results_by_index + } in &mut results_by_index { - estimated_total_hits += candidates.len() as usize; + estimated_total_hits += *estimated_total_hits_by_index; degraded |= *degraded_by_index; used_negative_operator |= *used_negative_operator_by_index; + + let facets_by_index = std::mem::take(facets_by_index); + let index = std::mem::take(index); + + facets.insert(index, facets_by_index); } - (estimated_total_hits, degraded, used_negative_operator) + (estimated_total_hits, degraded, used_negative_operator, facets) }; // 3.2 merge hits @@ -610,6 +897,18 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); + let (facet_distribution, facet_stats, facets_by_index) = match federation.merge_facets { + Some(merge_facets) => { + let facets = facets.merge(merge_facets, facet_order); + + let (facet_distribution, facet_stats) = + facets.map(|ComputedFacets { distribution, stats }| (distribution, stats)).unzip(); + + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; + let search_result = FederatedSearchResult { hits: merged_hits, processing_time_ms: before_search.elapsed().as_millis(), @@ -621,7 +920,39 @@ pub fn perform_federated_search( semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, }; Ok(search_result) } + +fn check_facet_order( + facet_order: &mut Option>, + current_index: &str, + facets_by_index: &Option>, + index: &milli::Index, + rtxn: &milli::heed::RoTxn<'_>, +) -> Result<(), ResponseError> { + if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { + let index_facet_order = index.sort_facet_values_by(rtxn)?; + for facet in facets_by_index { + let index_facet_order = index_facet_order.get(facet); + let (previous_index, previous_facet_order) = facet_order + .entry(facet.to_owned()) + .or_insert_with(|| (current_index.to_owned(), index_facet_order)); + if previous_facet_order != &index_facet_order { + return Err(MeilisearchHttpError::InconsistentFacetOrder { + facet: facet.clone(), + previous_facet_order: *previous_facet_order, + previous_uid: previous_index.clone(), + current_uid: current_index.to_owned(), + index_facet_order, + } + .into()); + } + } + }; + Ok(()) +} From 47e3c4b5c36302b83e492c307435429f333593b8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:52:13 +0200 Subject: [PATCH 11/60] Add new tests --- meilisearch/tests/search/multi.rs | 2037 +++++++++++++++++++++++++++++ 1 file changed, 2037 insertions(+) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index f92b9bfc8..f9da8877d 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3855,6 +3855,214 @@ async fn federation_federated_contains_pagination() { "###); } +#[actix_rt::test] +async fn federation_federated_contains_facets() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // empty facets are actually OK + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": []}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.953042328042328 + } + }, + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.9093915343915344 + } + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.4393939393939394 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": ["BOOSTED"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #1 or remove `federation` from the request", + "code": "invalid_multi_search_query_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" + } + "###); +} + +#[actix_rt::test] +async fn federation_non_faceted_for_an_index() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-name"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-facets"); + + let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits-no-name", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // still fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id"], + "fruits-no-facets": ["BOOST", "id"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // also fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "zorglub": ["BOOST", "id", "name"], + "fruits": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n Note: index `zorglub` is not used in queries", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn federation_non_federated_contains_federation_option() { let server = Server::new().await; @@ -4433,3 +4641,1832 @@ async fn federation_vector_two_indexes() { } "###); } + +#[actix_rt::test] +async fn federation_facets_different_indexes_same_facet() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman-2"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "batman-2": ["title"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title"], + "batman": ["title"], + "batman-2": ["title"] + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "title": { + "Badman": 2, + "Batman": 2, + "Batman Returns": 2, + "Batman the dark knight returns: Part 1": 2, + "Batman the dark knight returns: Part 2": 2, + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "facetStats": {} + } + "###); + + // mix and match query: will be sorted across indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": [], + "batman": ["title"], + "batman-2": ["title"] + } + }, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.5 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 11, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": {}, + "stats": {} + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_facets_same_indexes() { + let server = Server::new().await; + + let index = server.index("doggos"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("doggos-2"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 2, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1 + }, + "mother": { + "michelle": 1, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + }, + "doggos-2": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1 + }, + "father": { + "jean": 1, + "romain": 1 + }, + "mother": { + "michelle": 2 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 4.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + }, + "mergeFacets": {}, + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetDistribution": { + "doggos.age": { + "2": 2, + "4": 2, + "5": 1, + "6": 1 + }, + "father": { + "jean": 2, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 3, + "sophie": 1 + } + }, + "facetStats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_inconsistent_merge_order() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("movies-2"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "faceting": { + "sortFacetValuesBy": { "color": "count" } + } + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // without merging, it works + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + }, + "movies-2": { + "distribution": { + "color": { + "red": 3, + "blue": 3, + "yellow": 2, + "green": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + // fails with merging + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "code": "invalid_multi_search_facet_order", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" + } + "###); + + // works again with merging and forcing an order + let (response, code) = server +.multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "count" + } +}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, +]})) +.await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "red": 6, + "blue": 6, + "yellow": 4, + "green": 4 + }, + "title": { + "Shazam!": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Gläss": 2, + "Escape Room": 2, + "Captain Marvel": 2, + "Batman the dark knight returns: Part 2": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman Returns": 1, + "Batman": 1, + "Badman": 1 + } + }, + "facetStats": {} + } + "###); + + // works also with the other order + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "alpha" + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 6, + "green": 4, + "red": 6, + "yellow": 4 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1, + "Captain Marvel": 2, + "Escape Room": 2, + "Gläss": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Shazam!": 2 + } + }, + "facetStats": {} + } + "###); + + // can limit the number of values + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "count", + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "red": 6, + "blue": 6, + "yellow": 4 + }, + "title": { + "Shazam!": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Gläss": 2 + } + }, + "facetStats": {} + } + "###); + + // can limit the number of values by alpha + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "alpha", + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 6, + "green": 4, + "red": 6 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1 + } + }, + "facetStats": {} + } + "###); +} From 91dfab317f2c53f1556fcd3eacb8cb1980c50fce Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:17:46 +0200 Subject: [PATCH 12/60] New error --- meilisearch-types/src/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index bf89fe614..d443e5709 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,6 +238,7 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; From 38c4be1c8e6de30b237179fc8eca15f9cf4eb08c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:09 +0200 Subject: [PATCH 13/60] compute_facets accepts Route argument to fixup error code --- meilisearch/src/search/mod.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 99245bdc1..13cfb9334 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -991,7 +991,15 @@ pub fn perform_search( let (facet_distribution, facet_stats) = facets .map(move |facets| { - compute_facet_distribution_stats(&facets, index, &rtxn, candidates, None, None) + compute_facet_distribution_stats( + &facets, + index, + &rtxn, + candidates, + None, + None, + Route::Search, + ) }) .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) @@ -1017,6 +1025,11 @@ pub struct ComputedFacets { pub stats: BTreeMap, } +enum Route { + Search, + MultiSearch, +} + fn compute_facet_distribution_stats>( facets: &[S], index: &Index, @@ -1024,6 +1037,7 @@ fn compute_facet_distribution_stats>( candidates: roaring::RoaringBitmap, override_max_values_per_facet: Option, override_sort_facet_values_by: Option, + route: Route, ) -> Result { let mut facet_distribution = index.facets_distribution(rtxn); @@ -1054,7 +1068,16 @@ fn compute_facet_distribution_stats>( let distribution = facet_distribution .candidates(candidates) .default_order_by(sort_facet_values_by("*")) - .execute()?; + .execute() + .map_err(|error| match (error, route) { + ( + error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution { + .. + }), + Route::MultiSearch, + ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets), + (error, _) => error.into(), + })?; let stats = facet_distribution.compute_stats()?; let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); Ok(ComputedFacets { distribution, stats }) From 95da428dc8c1ed55dcb8e8b866caf974e0569be4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:23 +0200 Subject: [PATCH 14/60] Use route in federated --- meilisearch/src/search/federated.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 9d16ca59d..6470002ab 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -781,6 +781,7 @@ pub fn perform_federated_search( candidates, override_max_values_per_facet, override_sort_facet_values_by, + super::Route::MultiSearch, ) }) .transpose() @@ -844,6 +845,7 @@ pub fn perform_federated_search( Default::default(), override_max_values_per_facet, override_sort_facet_values_by, + super::Route::MultiSearch, ) { error.message = format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); From 6732dd95d77f605bf946139844e5379d57fed320 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:32 +0200 Subject: [PATCH 15/60] Update tests --- meilisearch/tests/search/multi.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index f9da8877d..0eeca4ce9 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3989,9 +3989,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); @@ -4011,9 +4011,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); @@ -4034,9 +4034,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); From dc8a662209395378dc9a6c17e3f1def3f00a218c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 10:08:21 +0200 Subject: [PATCH 16/60] federated queries: adjust error message --- meilisearch/src/error.rs | 8 ++++---- meilisearch/src/search/federated.rs | 10 ++++++++-- meilisearch/src/search/mod.rs | 5 ++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index fa315837f..9d5eff016 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -32,9 +32,9 @@ pub enum MeilisearchHttpError { FederationOptionsInNonFederatedRequest(usize), #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), - #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #{0} or remove `federation` from the request")] - FacetsInFederatedQuery(usize), - #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] + FacetsInFederatedQuery(usize, String, Vec), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] InconsistentFacetOrder { facet: String, previous_facet_order: OrderBy, @@ -107,7 +107,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } - MeilisearchHttpError::FacetsInFederatedQuery(_) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets, MeilisearchHttpError::InconsistentFacetOrder { .. } => { Code::InvalidMultiSearchFacetOrder } diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 6470002ab..46643556d 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -513,8 +513,14 @@ pub fn perform_federated_search( .into()); } - if federated_query.has_facets() { - return Err(MeilisearchHttpError::FacetsInFederatedQuery(query_index).into()); + if let Some(facets) = federated_query.has_facets() { + let facets = facets.to_owned(); + return Err(MeilisearchHttpError::FacetsInFederatedQuery( + query_index, + federated_query.index_uid.into_inner(), + facets, + ) + .into()); } let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 13cfb9334..4d5d8d890 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -455,9 +455,8 @@ impl SearchQueryWithIndex { } } - pub fn has_facets(&self) -> bool { - let Some(facets) = &self.facets else { return false }; - !facets.is_empty() + pub fn has_facets(&self) -> Option<&[String]> { + self.facets.as_deref().filter(|v| !v.is_empty()) } pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { From d9e0df74eaa7e0f00298520a3181183b14b8bec5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 10:09:01 +0200 Subject: [PATCH 17/60] update test --- meilisearch/tests/search/multi.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 0eeca4ce9..662d10a4c 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3931,7 +3931,7 @@ async fn federation_federated_contains_facets() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #1 or remove `federation` from the request", + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits: [\"BOOSTED\"]` for facets in federated search", "code": "invalid_multi_search_query_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From da0dd6febf9a0277b8a66c0d40b297553f41b3c9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:29:39 +0200 Subject: [PATCH 18/60] Make embedder mandatory --- meilisearch/src/routes/indexes/similar.rs | 6 +++--- meilisearch/src/search/mod.rs | 16 +++++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 5027a473e..dd30c793e 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -103,7 +103,7 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -139,8 +139,8 @@ pub struct SimilarQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError, default)] pub ranking_score_threshold: Option, - #[deserr(default, error = DeserrQueryParamError)] - pub embedder: Option, + #[deserr(error = DeserrQueryParamError)] + pub embedder: String, } #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..cca05a25d 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -267,8 +267,8 @@ impl fmt::Debug for SearchQuery { pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] pub semantic_ratio: SemanticRatio, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, } #[derive(Clone)] @@ -282,7 +282,7 @@ impl SearchKind { pub(crate) fn semantic( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result { let (embedder_name, embedder) = @@ -293,7 +293,7 @@ impl SearchKind { pub(crate) fn hybrid( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, semantic_ratio: f32, vector_len: Option, ) -> Result { @@ -305,14 +305,12 @@ impl SearchKind { pub(crate) fn embedder( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result<(String, Arc), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name()); - let embedder = embedders.get(embedder_name); let embedder = embedder @@ -537,8 +535,8 @@ pub struct SimilarQuery { pub limit: usize, #[deserr(default, error = DeserrJsonError)] pub filter: Option, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, #[deserr(default, error = DeserrJsonError)] pub attributes_to_retrieve: Option>, #[deserr(default, error = DeserrJsonError)] From 3c5e36355405900e6744baffd8a697df87354e1d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:30:13 +0200 Subject: [PATCH 19/60] Remove default embedders --- milli/src/vector/mod.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 04e646819..23417ced2 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -144,11 +144,6 @@ impl EmbeddingConfigs { self.0.get(name).cloned() } - /// Get the default embedder configuration, if any. - pub fn get_default(&self) -> Option<(Arc, Arc)> { - self.get(self.get_default_embedder_name()) - } - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { &self.0 } @@ -156,24 +151,6 @@ impl EmbeddingConfigs { pub fn into_inner(self) -> HashMap, Arc)> { self.0 } - - /// Get the name of the default embedder configuration. - /// - /// The default embedder is determined as follows: - /// - /// - If there is only one embedder, it is always the default. - /// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder. - /// - In all other cases, there is no default embedder. - pub fn get_default_embedder_name(&self) -> &str { - let mut it = self.0.keys(); - let first_name = it.next(); - let second_name = it.next(); - match (first_name, second_name) { - (None, _) => "default", - (Some(first), None) => first, - (Some(_), Some(_)) => "default", - } - } } impl IntoIterator for EmbeddingConfigs { From 2fdb1d8018dda972f313c2a92406c115f48baf89 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:28:06 +0200 Subject: [PATCH 20/60] SearchQueryGet can fail --- meilisearch/src/routes/indexes/search.rs | 35 ++++++++++++++++-------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 362bc9937..b7b75bc89 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet { } } -impl From for SearchQuery { - fn from(other: SearchQueryGet) -> Self { +impl TryFrom for SearchQuery { + type Error = ResponseError; + + fn try_from(other: SearchQueryGet) -> Result { let filter = match other.filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), @@ -140,19 +142,28 @@ impl From for SearchQuery { let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { (None, None) => None, - (None, Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) + (None, Some(_)) => { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(), + meilisearch_types::error::Code::InvalidHybridQuery, + )); + } + (Some(embedder), None) => { + Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder }) } - (Some(embedder), None) => Some(HybridQuery { - semantic_ratio: DEFAULT_SEMANTIC_RATIO(), - embedder: Some(embedder), - }), (Some(embedder), Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) + Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder }) } }; - Self { + if other.vector.is_some() && hybrid.is_none() { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `vector` is present".into(), + meilisearch_types::error::Code::MissingSearchHybrid, + )); + } + + Ok(Self { q: other.q, vector: other.vector.map(CS::into_inner), offset: other.offset.0, @@ -179,7 +190,7 @@ impl From for SearchQuery { hybrid, ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), locales: other.locales.map(|o| o.into_iter().collect()), - } + }) } } @@ -219,7 +230,7 @@ pub async fn search_with_url_query( debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let mut query: SearchQuery = params.into_inner().into(); + let mut query: SearchQuery = params.into_inner().try_into()?; // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { From 5239ae0297d2253ad38f3a4ec8204b5ed0f9296d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:28:40 +0200 Subject: [PATCH 21/60] Rework search kind so that a search without query but with vector is a vector search regardless of semantic ratio --- meilisearch/src/routes/indexes/search.rs | 56 ++++++++++-------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index b7b75bc89..6a8eee521 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -323,44 +323,36 @@ pub fn search_kind( features.check_vector("Passing `hybrid` as a parameter")?; } - // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing - if query.vector.is_none() { - match &query.q { - Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly), - None => return Ok(SearchKind::KeywordOnly), - _ => {} + // handle with care, the order of cases matters, the semantics is subtle + match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) { + // empty query, no vector => placeholder search + (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly), + // no query, no vector => placeholder search + (None, _, None) => Ok(SearchKind::KeywordOnly), + // hybrid.semantic_ratio == 1.0 => vector + (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { + SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len())) } - } - - match &query.hybrid { - Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => { - Ok(SearchKind::semantic( - index_scheduler, - index, - embedder.as_deref(), - query.vector.as_ref().map(Vec::len), - )?) - } - Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => { + // hybrid.semantic_ratio == 0.0 => keyword + (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { Ok(SearchKind::KeywordOnly) } - Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid( + // no query, hybrid, vector => semantic + (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { + SearchKind::semantic(index_scheduler, index, embedder, Some(v.len())) + } + // query, no hybrid, no vector => keyword + (Some(_), None, None) => Ok(SearchKind::KeywordOnly), + // query, hybrid, maybe vector => hybrid + (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, index, - embedder.as_deref(), + embedder, **semantic_ratio, - query.vector.as_ref().map(Vec::len), - )?), - None => match (query.q.as_deref(), query.vector.as_deref()) { - (_query, None) => Ok(SearchKind::KeywordOnly), - (None, Some(_vector)) => Ok(SearchKind::semantic( - index_scheduler, - index, - None, - query.vector.as_ref().map(Vec::len), - )?), - (Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), - }, + v.map(|v| v.len()), + ), + + (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), } } From cac5836f6fee7f049977e4a049edea94869e56e7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:27:00 +0200 Subject: [PATCH 22/60] Remove hybrid.embedder boolean from analytics because embedder is now mandatory --- meilisearch/src/analytics/segment_analytics.rs | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 07350d506..f8d6a0fdc 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -646,8 +646,6 @@ pub struct SearchAggregator { max_vector_size: usize, // Whether the semantic ratio passed to a hybrid search equals the default ratio. semantic_ratio: bool, - // Whether a non-default embedder was specified - embedder: bool, hybrid: bool, retrieve_vectors: bool, @@ -795,7 +793,6 @@ impl SearchAggregator { if let Some(hybrid) = hybrid { ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.embedder = hybrid.embedder.is_some(); ret.hybrid = true; } @@ -863,7 +860,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -923,7 +919,6 @@ impl SearchAggregator { self.retrieve_vectors |= retrieve_vectors; self.semantic_ratio |= semantic_ratio; self.hybrid |= hybrid; - self.embedder |= embedder; // pagination self.max_limit = self.max_limit.max(max_limit); @@ -999,7 +994,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -1051,7 +1045,6 @@ impl SearchAggregator { "hybrid": { "enabled": hybrid, "semantic_ratio": semantic_ratio, - "embedder": embedder, }, "pagination": { "max_limit": max_limit, @@ -1782,7 +1775,6 @@ pub struct SimilarAggregator { used_syntax: HashMap, // Whether a non-default embedder was specified - embedder: bool, retrieve_vectors: bool, // pagination @@ -1803,7 +1795,7 @@ impl SimilarAggregator { pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { let SimilarQuery { id: _, - embedder, + embedder: _, offset, limit, attributes_to_retrieve: _, @@ -1851,7 +1843,6 @@ impl SimilarAggregator { ret.show_ranking_score_details = *show_ranking_score_details; ret.ranking_score_threshold = ranking_score_threshold.is_some(); - ret.embedder = embedder.is_some(); ret.retrieve_vectors = *retrieve_vectors; ret @@ -1883,7 +1874,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = other; @@ -1914,7 +1904,6 @@ impl SimilarAggregator { *used_syntax = used_syntax.saturating_add(value); } - self.embedder |= embedder; self.retrieve_vectors |= retrieve_vectors; // pagination @@ -1948,7 +1937,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = self; @@ -1980,9 +1968,6 @@ impl SimilarAggregator { "vector": { "retrieve_vectors": retrieve_vectors, }, - "hybrid": { - "embedder": embedder, - }, "pagination": { "max_limit": max_limit, "max_offset": max_offset, From a35a339c3d1e4e0c720a164328c76b59983e8242 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:27:35 +0200 Subject: [PATCH 23/60] Touchup error message --- meilisearch/src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..e0c9006db 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -61,7 +61,7 @@ pub enum MeilisearchHttpError { DocumentFormat(#[from] DocumentFormatError), #[error(transparent)] Join(#[from] JoinError), - #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")] + #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")] MissingSearchHybrid, } From 1120a5296ce65e77cc1d4f6528ac00eaa4b9780c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:30:04 +0200 Subject: [PATCH 24/60] Update tests --- meilisearch/tests/search/hybrid.rs | 65 ++++++++------- meilisearch/tests/search/mod.rs | 35 +++++--- meilisearch/tests/similar/errors.rs | 120 +++++++++++++++++---------- meilisearch/tests/similar/mod.rs | 49 ++++++----- meilisearch/tests/vector/mod.rs | 9 +- meilisearch/tests/vector/openai.rs | 32 +++---- meilisearch/tests/vector/settings.rs | 3 +- 7 files changed, 185 insertions(+), 128 deletions(-) diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index ee4181694..e301c0b05 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -128,7 +128,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -137,7 +137,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -146,7 +146,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -161,7 +161,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -174,7 +174,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -188,8 +188,11 @@ async fn simple_search_hf() { let server = Server::new().await; let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - let (response, code) = - index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}), + ) + .await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); snapshot!(response["semanticHitCount"], @"0"); @@ -197,7 +200,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( // disable ranking score as the vectors between architectures are not equal - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -206,7 +209,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -215,7 +218,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -224,7 +227,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -237,7 +240,7 @@ async fn distribution_shift() { let server = Server::new().await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; - let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); + let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}); let (response, code) = index.search_post(search.clone()).await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); @@ -271,7 +274,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.2}, + "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true, "attributesToHighlight": [ "desc", @@ -287,7 +290,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.8}, + "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -304,7 +307,7 @@ async fn highlighter() { // no highlighting on full semantic let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 1.0}, + "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}), ) .unwrap(), ) @@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}), ) .unwrap(), ) @@ -398,7 +401,7 @@ async fn single_document() { let (response, code) = index .search_post( - json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -414,7 +417,7 @@ async fn query_combination() { // search without query and vector, but with hybrid => still placeholder let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -423,7 +426,7 @@ async fn query_combination() { // same with a different semantic ratio let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -432,7 +435,7 @@ async fn query_combination() { // wrong vector dimensions let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"400 Bad Request"); @@ -447,7 +450,7 @@ async fn query_combination() { // full vector let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -456,7 +459,7 @@ async fn query_combination() { // full keyword, without a query let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -465,7 +468,7 @@ async fn query_combination() { // query + vector, full keyword => keyword let (response, code) = index - .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -480,7 +483,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.", + "message": "Invalid request: missing `hybrid` parameter when `vector` is present.", "code": "missing_search_hybrid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#missing_search_hybrid" @@ -490,7 +493,7 @@ async fn query_combination() { // full vector, without a vector => error let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -507,7 +510,7 @@ async fn query_combination() { // hybrid without a vector => full keyword let (response, code) = index .search_post( - json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -523,7 +526,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -573,7 +576,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 974025652..d1091d944 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() { index.add_documents(json!(documents), None).await; index.wait_task(0).await; - index - .search(json!({ + let (response, code) = index + .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true - }), |response, code|{ - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }) + })) .await; + + { + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + } + index .search(json!({ "retrieveVectors": true, @@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() { let (response, code) = index .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true, "retrieveVectors": true, })) diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..be8dabee7 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -18,7 +18,7 @@ async fn similar_unexisting_index() { }); index - .similar(json!({"id": 287947}), |response, code| { + .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| { assert_eq!(code, 404); assert_eq!(response, expected_response); }) @@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"id": 287947})).await; + let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -199,7 +199,8 @@ async fn similar_not_found_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await; + let (response, code) = + index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -230,7 +231,8 @@ async fn similar_bad_offset() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -241,7 +243,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("?id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -272,7 +274,8 @@ async fn similar_bad_limit() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -283,7 +286,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("?id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -323,7 +326,8 @@ async fn similar_bad_filter() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await; + let (response, code) = + index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -361,7 +365,7 @@ async fn filter_invalid_syntax_object() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { + .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -400,7 +404,7 @@ async fn filter_invalid_syntax_array() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { + .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() { }); index .similar( - json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}), + json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}), |response, code| { assert_eq!(response, expected_response); assert_eq!(code, 400); @@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() { server.set_features(json!({"vectorStore": true})).await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() { } "###); - let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs index b4c95b059..fa0797a41 100644 --- a/meilisearch/tests/similar/mod.rs +++ b/meilisearch/tests/similar/mod.rs @@ -80,9 +80,11 @@ async fn basic() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -154,13 +156,16 @@ async fn basic() { } ] "###); - }) + }, + ) .await; index - .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "How to Train Your Dragon: The Hidden World", @@ -232,7 +237,8 @@ async fn basic() { } ] "###); - }) + }, + ) .await; } @@ -272,7 +278,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); @@ -358,7 +364,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); @@ -426,7 +432,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); @@ -476,7 +482,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); @@ -508,7 +514,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @"[]"); @@ -553,7 +559,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -617,7 +623,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -681,9 +687,11 @@ async fn limit_and_offset() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -704,12 +712,13 @@ async fn limit_and_offset() { } ] "###); - }) + }, + ) .await; index .similar( - json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), + json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 7c9b375d9..0e38c1366 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -624,7 +624,8 @@ async fn clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r###" { "hits": [], @@ -685,7 +686,11 @@ async fn add_remove_one_vector_4588() { let task = index.wait_task(value.uid()).await; snapshot!(task, name: "document-deleted"); - let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; + let (documents, _code) = index + .search_post( + json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }), + ) + .await; snapshot!(documents, @r###" { "hits": [ diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs index 2ede7df15..04c068c40 100644 --- a/meilisearch/tests/vector/openai.rs +++ b/meilisearch/tests/vector/openai.rs @@ -449,7 +449,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, })) .await; snapshot!(code, @"200 OK"); @@ -489,7 +489,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -529,7 +529,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -616,7 +616,7 @@ async fn tokenize_long_text() { "q": "grand chien de berger des montagnes", "showRankingScore": true, "attributesToRetrieve": ["id"], - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1064,7 +1064,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1104,7 +1104,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1144,7 +1144,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1295,7 +1295,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1335,7 +1335,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1375,7 +1375,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1756,7 +1756,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1796,7 +1796,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1836,7 +1836,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 0714a22ca..4f07ca18b 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -218,7 +218,8 @@ async fn reset_embedder_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" { "message": "Cannot find embedder with name `default`.", From 9f1fb4b425bbbb78cd0c972add760710696a5d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 17 Sep 2024 16:44:11 +0200 Subject: [PATCH 25/60] Introduce the STARTS WITH filter operator gated under an experimental feature --- filter-parser/src/condition.rs | 29 ++++++++++++++++++++++++++ filter-parser/src/error.rs | 2 +- filter-parser/src/lib.rs | 35 +++++++++++++++++++++----------- filter-parser/src/value.rs | 2 ++ index-scheduler/src/features.rs | 2 +- milli/src/search/facet/filter.rs | 20 +++++++++++++++++- 6 files changed, 75 insertions(+), 15 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index 679555a89..04b6dc266 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -27,6 +27,7 @@ pub enum Condition<'a> { LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, Contains { keyword: Token<'a>, word: Token<'a> }, + StartsWith { keyword: Token<'a>, word: Token<'a> }, } /// condition = value ("==" | ">" ...) value @@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult { )) } +/// starts with = value "CONTAINS" value +pub fn parse_starts_with(input: Span) -> IResult { + let (input, (fid, starts_with, value)) = + tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?; + Ok(( + input, + FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + }, + )) +} + +/// starts with = value "NOT" WS+ "CONTAINS" value +pub fn parse_not_starts_with(input: Span) -> IResult { + let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH"))); + let (input, (fid, (_not, _spaces, starts_with), value)) = + tuple((parse_value, keyword, cut(parse_value)))(input)?; + + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + })), + )) +} + /// to = value value "TO" WS+ value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, _, to)) = diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index f530cc690..122396b87 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> { } ErrorKind::InvalidPrimary => { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::InvalidEscapedNumber => { writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d06154f25..9c323660e 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -49,7 +49,7 @@ use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; use condition::{ parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, - parse_is_null, parse_not_contains, parse_not_exists, + parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with, }; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; @@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> { | Condition::LowerThan(_) | Condition::LowerThanOrEqual(_) | Condition::Between { .. } => None, - Condition::Contains { keyword, word: _ } => Some(keyword), + Condition::Contains { keyword, word: _ } + | Condition::StartsWith { keyword, word: _ } => Some(keyword), }, FilterCondition::Not(this) => this.use_contains_operator(), FilterCondition::Or(seq) | FilterCondition::And(seq) => { @@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_to, parse_contains, parse_not_contains, + parse_starts_with, + parse_not_starts_with, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo, parse_geo_distance, @@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"), + Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"), } } } @@ -680,6 +684,13 @@ pub mod tests { insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + // Test STARTS WITH + NOT STARTS WITH + insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + // Test nested NOT insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); @@ -851,12 +862,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -919,35 +930,35 @@ pub mod tests { "###); insta::assert_snapshot!(p(r#"value NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. 1:11 value NULL "###); insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. 1:15 value NOT NULL "###); insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. 1:12 value EMPTY "###); insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. 1:16 value NOT EMPTY "###); insta::assert_snapshot!(p(r#"value IS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. 1:9 value IS "###); insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. 1:13 value IS NOT "###); insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. 1:16 value IS EXISTS "###); insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. 1:20 value IS NOT EXISTS "###); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 06ec1daef..5912f6900 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool { | "NULL" | "EMPTY" | "CONTAINS" + | "STARTS" + | "WITH" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c998ff444..f4ac80511 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -87,7 +87,7 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "Using `CONTAINS` in a filter", + disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter", feature: "contains filter", issue_link: "https://github.com/orgs/meilisearch/discussions/763", } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 9ce201aca..c059d2d27 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -12,7 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ @@ -336,6 +336,24 @@ impl<'a> Filter<'a> { return Ok(docids); } + Condition::StartsWith { keyword: _, word } => { + let value = crate::normalize_facet(word.value()); + let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() }; + let docids = strings_db + .prefix_iter(rtxn, &base)? + .map(|result| -> Result { + match result { + Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap), + Err(_e) => Err(InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into()), + } + }) + .union()?; + + return Ok(docids); + } }; let mut output = RoaringBitmap::new(); From 54d3ba3357ec08744191768ed33e4e70598c90cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 17 Sep 2024 16:59:22 +0200 Subject: [PATCH 26/60] Fix tests that check error message content --- filter-parser/src/lib.rs | 6 +++--- meilisearch/tests/documents/errors.rs | 6 +++--- meilisearch/tests/search/errors.rs | 4 ++-- meilisearch/tests/similar/errors.rs | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 9c323660e..cfe009acb 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -762,7 +762,7 @@ pub mod tests { "###); insta::assert_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -772,12 +772,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 280073f51..4c644ae98 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -525,7 +525,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -723,7 +723,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index fee7eef7d..0086c6af9 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() { .search(json!({"filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() { .search(json!({"filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..e8f1e8bf4 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -364,7 +364,7 @@ async fn filter_invalid_syntax_object() { .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -403,7 +403,7 @@ async fn filter_invalid_syntax_array() { .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" From 98b77aec668cc43bb15d4e5c373d3426d64c5fc3 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:22:03 +0200 Subject: [PATCH 27/60] Remove runtime sortFacetValuesBy --- meilisearch-types/src/error.rs | 1 - meilisearch/src/search/federated.rs | 81 ++++++++--------------------- 2 files changed, 21 insertions(+), 61 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index d443e5709..535bf2dd6 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -248,7 +248,6 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchSortFacetValuesBy , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 46643556d..804d56689 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -13,8 +13,8 @@ use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, - InvalidMultiSearchMergeFacets, InvalidMultiSearchSortFacetValuesBy, InvalidMultiSearchWeight, - InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, + InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; @@ -86,44 +86,10 @@ pub struct Federation { #[derive(Copy, Clone, Debug, deserr::Deserr, Default)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct MergeFacets { - #[deserr(default, error = DeserrJsonError)] - pub sort_facet_values_by: SortFacetValuesBy, #[deserr(default, error = DeserrJsonError)] pub max_values_per_facet: Option, } -impl MergeFacets { - pub fn to_components(this: Option) -> (Option, Option) { - match this { - Some(MergeFacets { sort_facet_values_by, max_values_per_facet }) => { - (sort_facet_values_by.into(), max_values_per_facet) - } - None => (None, None), - } - } -} - -#[derive(Debug, deserr::Deserr, Default, Clone, Copy)] -#[deserr(rename_all = camelCase, deny_unknown_fields)] -pub enum SortFacetValuesBy { - #[default] - IndexSettings, - /// By lexicographic order... - Alpha, - /// Or by number of docids in common? - Count, -} - -impl From for Option { - fn from(value: SortFacetValuesBy) -> Self { - match value { - SortFacetValuesBy::Alpha => Some(OrderBy::Lexicographic), - SortFacetValuesBy::Count => Some(OrderBy::Count), - SortFacetValuesBy::IndexSettings => None, - } - } -} - #[derive(Debug, deserr::Deserr, Default)] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum GroupFacetsBy { @@ -413,8 +379,8 @@ impl FederatedFacets { pub fn merge( self, - MergeFacets { sort_facet_values_by, max_values_per_facet }: MergeFacets, - facet_order: Option>, + MergeFacets { max_values_per_facet }: MergeFacets, + facet_order: BTreeMap, ) -> Option { if self.is_empty() { return None; @@ -461,12 +427,7 @@ impl FederatedFacets { // fixup order for (facet, values) in &mut distribution { - let order_by = Option::::from(sort_facet_values_by) - .or_else(|| match &facet_order { - Some(facet_order) => facet_order.get(facet).map(|(_, order)| *order), - None => None, - }) - .unwrap_or_default(); + let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); match order_by { OrderBy::Lexicographic => { @@ -535,8 +496,8 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; - let (override_sort_facet_values_by, override_max_values_per_facet) = - MergeFacets::to_components(federation.merge_facets); + let override_max_values_per_facet = + federation.merge_facets.and_then(|merge_facets| merge_facets.max_values_per_facet); // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search @@ -548,9 +509,7 @@ pub fn perform_federated_search( // to detect if the order is inconsistent for a facet. let mut facet_order: Option> = match federation.merge_facets { - Some(MergeFacets { sort_facet_values_by: SortFacetValuesBy::IndexSettings, .. }) => { - Some(Default::default()) - } + Some(MergeFacets { .. }) => Some(Default::default()), _ => None, }; @@ -786,7 +745,7 @@ pub fn perform_federated_search( &rtxn, candidates, override_max_values_per_facet, - override_sort_facet_values_by, + None, super::Route::MultiSearch, ) }) @@ -850,7 +809,7 @@ pub fn perform_federated_search( &rtxn, Default::default(), override_max_values_per_facet, - override_sort_facet_values_by, + None, super::Route::MultiSearch, ) { error.message = @@ -905,17 +864,19 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); - let (facet_distribution, facet_stats, facets_by_index) = match federation.merge_facets { - Some(merge_facets) => { - let facets = facets.merge(merge_facets, facet_order); + let (facet_distribution, facet_stats, facets_by_index) = + match federation.merge_facets.zip(facet_order) { + Some((merge_facets, facet_order)) => { + let facets = facets.merge(merge_facets, facet_order); - let (facet_distribution, facet_stats) = - facets.map(|ComputedFacets { distribution, stats }| (distribution, stats)).unzip(); + let (facet_distribution, facet_stats) = facets + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); - (facet_distribution, facet_stats, FederatedFacets::default()) - } - None => (None, None, facets), - }; + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; let search_result = FederatedSearchResult { hits: merged_hits, From c42746c4cd79349122d29095d5df0d378966aa59 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:22:14 +0200 Subject: [PATCH 28/60] Update tests --- meilisearch/tests/search/multi.rs | 512 +----------------------------- 1 file changed, 4 insertions(+), 508 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 662d10a4c..7cf4bd415 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -5804,356 +5804,15 @@ async fn federation_inconsistent_merge_order() { } "###); - // works again with merging and forcing an order - let (response, code) = server -.multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "count" - } -}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, -]})) -.await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "red": 6, - "blue": 6, - "yellow": 4, - "green": 4 - }, - "title": { - "Shazam!": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Gläss": 2, - "Escape Room": 2, - "Captain Marvel": 2, - "Batman the dark knight returns: Part 2": 1, - "Batman the dark knight returns: Part 1": 1, - "Batman Returns": 1, - "Batman": 1, - "Badman": 1 - } - }, - "facetStats": {} - } - "###); - - // works also with the other order - let (response, code) = server - .multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "alpha" - } - }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - ]})) - .await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "blue": 6, - "green": 4, - "red": 6, - "yellow": 4 - }, - "title": { - "Badman": 1, - "Batman": 1, - "Batman Returns": 1, - "Batman the dark knight returns: Part 1": 1, - "Batman the dark knight returns: Part 2": 1, - "Captain Marvel": 2, - "Escape Room": 2, - "Gläss": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Shazam!": 2 - } - }, - "facetStats": {} - } - "###); - // can limit the number of values let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { "movies": ["title", "color"], "batman": ["title"], - "movies-2": ["title", "color"], + "movies-2": ["title"], }, "mergeFacets": { - "sortFacetValuesBy": "count", "maxValuesPerFacet": 3, } }, "queries": [ @@ -6293,172 +5952,9 @@ async fn federation_inconsistent_merge_order() { "estimatedTotalHits": 15, "facetDistribution": { "color": { - "red": 6, - "blue": 6, - "yellow": 4 - }, - "title": { - "Shazam!": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Gläss": 2 - } - }, - "facetStats": {} - } - "###); - - // can limit the number of values by alpha - let (response, code) = server - .multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "alpha", - "maxValuesPerFacet": 3, - } - }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - ]})) - .await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "blue": 6, - "green": 4, - "red": 6 + "blue": 3, + "green": 2, + "red": 3 }, "title": { "Badman": 1, From af8edab21df07dcd01b055174105febe36a0f92e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:39:51 +0200 Subject: [PATCH 29/60] Remove mention of sort order and recommend changing index settings on inconsistent order error --- meilisearch/src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 9d5eff016..b3a94e60d 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -34,7 +34,7 @@ pub enum MeilisearchHttpError { PaginationInFederatedQuery(usize, &'static str), #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] FacetsInFederatedQuery(usize, String, Vec), - #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")] InconsistentFacetOrder { facet: String, previous_facet_order: OrderBy, From df648ce7a63fa59c0d72d1d010a477dda585a301 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:40:14 +0200 Subject: [PATCH 30/60] Update tests --- meilisearch/tests/search/multi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 7cf4bd415..1a2ca4c84 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From 5de4b48552827616e5a27f93ba8d7bcfa09b0d60 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:49:00 +0200 Subject: [PATCH 31/60] Fixup error messages --- meilisearch/src/search/federated.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 804d56689..7efbea20b 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -558,7 +558,7 @@ pub fn perform_federated_search( error.message = format!( "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", if let Some(query_index) = first_query_index { - format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") } else { Default::default() } @@ -755,7 +755,7 @@ pub fn perform_federated_search( "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", error.message, if let Some(query_index) = first_query_index { - format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") } else { Default::default() } @@ -783,7 +783,7 @@ pub fn perform_federated_search( // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; err.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", err.message ); return Err(err); @@ -797,7 +797,7 @@ pub fn perform_federated_search( check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) { error.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n Note: index `{index_uid}` is not used in queries", + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", ); return Err(error); } @@ -813,7 +813,7 @@ pub fn perform_federated_search( super::Route::MultiSearch, ) { error.message = - format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); return Err(error); } } From 52a52f97cf1a46b66302323ab646fb1088036bd7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:49:12 +0200 Subject: [PATCH 32/60] Update tests --- meilisearch/tests/search/multi.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 1a2ca4c84..b9593f05f 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3988,7 +3988,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4010,7 +4010,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4033,7 +4033,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n - Note: index `fruits-no-facets` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4055,7 +4055,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n Note: index `zorglub` is not used in queries", + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n - Note: index `zorglub` is not used in queries", "code": "index_not_found", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#index_not_found" @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From 174d69ff727c7213d037e5381fe2dd7b077e3de9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 18:16:14 +0200 Subject: [PATCH 33/60] Don't override max value in indexes --- meilisearch/src/search/federated.rs | 7 ------ meilisearch/src/search/mod.rs | 35 ++++++++--------------------- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 7efbea20b..94a25a0c9 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -496,9 +496,6 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; - let override_max_values_per_facet = - federation.merge_facets.and_then(|merge_facets| merge_facets.max_values_per_facet); - // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; @@ -744,8 +741,6 @@ pub fn perform_federated_search( &index, &rtxn, candidates, - override_max_values_per_facet, - None, super::Route::MultiSearch, ) }) @@ -808,8 +803,6 @@ pub fn perform_federated_search( &index, &rtxn, Default::default(), - override_max_values_per_facet, - None, super::Route::MultiSearch, ) { error.message = diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 4d5d8d890..5bba40a07 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -990,15 +990,7 @@ pub fn perform_search( let (facet_distribution, facet_stats) = facets .map(move |facets| { - compute_facet_distribution_stats( - &facets, - index, - &rtxn, - candidates, - None, - None, - Route::Search, - ) + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search) }) .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) @@ -1034,39 +1026,30 @@ fn compute_facet_distribution_stats>( index: &Index, rtxn: &RoTxn, candidates: roaring::RoaringBitmap, - override_max_values_per_facet: Option, - override_sort_facet_values_by: Option, route: Route, ) -> Result { let mut facet_distribution = index.facets_distribution(rtxn); - let max_values_by_facet = match override_max_values_per_facet { - Some(max_values_by_facet) => max_values_by_facet, - None => index - .max_values_per_facet(rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET), - }; + let max_values_by_facet = index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET); facet_distribution.max_values_per_facet(max_values_by_facet); let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; - let sort_facet_values_by = |n: &str| match override_sort_facet_values_by { - Some(order_by) => order_by, - None => sort_facet_values_by.get(n), - }; - // add specific facet if there is no placeholder if facets.iter().all(|f| f.as_ref() != "*") { - let fields: Vec<_> = facets.iter().map(|n| (n, sort_facet_values_by(n.as_ref()))).collect(); + let fields: Vec<_> = + facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect(); facet_distribution.facets(fields); } let distribution = facet_distribution .candidates(candidates) - .default_order_by(sort_facet_values_by("*")) + .default_order_by(sort_facet_values_by.get("*")) .execute() .map_err(|error| match (error, route) { ( From c2caff1716a84a93c5652180ed2ae95c325acd76 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 18 Sep 2024 11:26:43 +0200 Subject: [PATCH 34/60] Remove obsolete enum --- meilisearch/src/search/federated.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 94a25a0c9..170da4112 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -90,14 +90,6 @@ pub struct MergeFacets { pub max_values_per_facet: Option, } -#[derive(Debug, deserr::Deserr, Default)] -#[deserr(rename_all = camelCase, deny_unknown_fields)] -pub enum GroupFacetsBy { - Facet, - #[default] - Index, -} - #[derive(Debug, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct FederatedSearch { From 50981ea778561bc0d20ae0d6b86031bc5f127b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 18 Sep 2024 11:44:29 +0200 Subject: [PATCH 35/60] Update the error messages --- meilisearch/tests/search/errors.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 0086c6af9..6840f8fba 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" From 00f8d03f4349888b654456aa2cc2683aefffaece Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 18 Sep 2024 11:43:07 +0200 Subject: [PATCH 36/60] Use f32::min and f32::max --- meilisearch/src/search/federated.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 170da4112..5279c26bb 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -408,10 +408,8 @@ impl FederatedFacets { std::collections::btree_map::Entry::Occupied(mut entry) => { let stats = entry.get_mut(); - stats.min = - if stats.min <= index_stats.min { stats.min } else { index_stats.min }; - stats.max = - if stats.max >= index_stats.max { stats.max } else { index_stats.max }; + stats.min = f64::min(stats.min, index_stats.min); + stats.max = f64::max(stats.max, index_stats.max); } } } From 716817122a9949c2e45631d3076e8afb34f6a949 Mon Sep 17 00:00:00 2001 From: Ian Ornstein Date: Wed, 18 Sep 2024 16:30:29 -0500 Subject: [PATCH 37/60] Correct broken links in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e60d09b13..59d618ab2 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co ## ✨ Features - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling -- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants +- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs - **Easy to install, deploy, and maintain** From 877717cb2675e154eaa98d947651c2e2405c485a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 08:34:04 +0200 Subject: [PATCH 38/60] Add a test using Swedish documents --- meilisearch/tests/search/locales.rs | 122 ++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 4724f975d..53bcece06 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1143,3 +1143,125 @@ async fn facet_search_with_localized_attributes() { } "###); } +#[actix_rt::test] +async fn swedish_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": "tra1-1", "product": "trä"}, + {"id": "tra2-1", "product": "traktor"}, + {"id": "tra1-2", "product": "träbjälke"}, + {"id": "tra2-2", "product": "trafiksignal"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["swe"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + index + .search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + // force swedish + index + .search( + json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + index + .search( + json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} From bbaee3dbc63640984051e3eb37e7fc0e57dd873e Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 08:34:51 +0200 Subject: [PATCH 39/60] Add Swedish pipeline in all-tokenization feature --- milli/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..8a5ba366f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -106,6 +106,7 @@ all-tokenizations = [ "charabia/greek", "charabia/khmer", "charabia/vietnamese", + "charabia/swedish-recomposition", ] # Use POSIX semaphores instead of SysV semaphores in LMDB From cc45e264ca6a1eae09cc6370b54b4dc73a1f6ff7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 18 Sep 2024 18:13:37 +0200 Subject: [PATCH 40/60] implement the binary quantization in meilisearch --- Cargo.lock | 30 ++- index-scheduler/src/lib.rs | 11 +- meilisearch-types/src/error.rs | 5 +- meilisearch/src/routes/indexes/similar.rs | 5 +- meilisearch/src/search/mod.rs | 50 +++-- milli/Cargo.toml | 3 +- milli/src/error.rs | 4 + milli/src/index.rs | 55 ++--- milli/src/search/hybrid.rs | 4 +- milli/src/search/mod.rs | 7 +- milli/src/search/new/mod.rs | 4 + milli/src/search/new/vector_sort.rs | 10 +- milli/src/search/similar.rs | 9 +- .../extract/extract_vector_points.rs | 98 ++++----- milli/src/update/index_documents/mod.rs | 37 +++- milli/src/update/index_documents/transform.rs | 35 +--- .../src/update/index_documents/typed_chunk.rs | 26 ++- milli/src/update/settings.rs | 101 +++++---- milli/src/vector/mod.rs | 192 +++++++++++++++++- milli/src/vector/settings.rs | 96 +++++++-- 20 files changed, 559 insertions(+), 223 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1af89d382..485ab1305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,6 +384,24 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +[[package]] +name = "arroy" +version = "0.4.0" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2", + "nohash", + "ordered-float", + "rand", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + [[package]] name = "arroy" version = "0.4.0" @@ -2555,7 +2573,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy", + "arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bincode", "crossbeam", @@ -2838,7 +2856,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] @@ -3545,7 +3563,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy", + "arroy 0.4.0", "big_s", "bimap", "bincode", @@ -3686,6 +3704,12 @@ version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +[[package]] +name = "nohash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca" + [[package]] name = "nom" version = "7.1.3" diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 753e8c179..2126b0b94 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1477,7 +1477,7 @@ impl IndexScheduler { .map( |IndexEmbeddingConfig { name, - config: milli::vector::EmbeddingConfig { embedder_options, prompt }, + config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized }, .. }| { let prompt = @@ -1486,7 +1486,10 @@ impl IndexScheduler { { let embedders = self.embedders.read().unwrap(); if let Some(embedder) = embedders.get(&embedder_options) { - return Ok((name, (embedder.clone(), prompt))); + return Ok(( + name, + (embedder.clone(), prompt, quantized.unwrap_or_default()), + )); } } @@ -1500,7 +1503,7 @@ impl IndexScheduler { let mut embedders = self.embedders.write().unwrap(); embedders.insert(embedder_options, embedder.clone()); } - Ok((name, (embedder, prompt))) + Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) }, ) .collect(); @@ -5197,7 +5200,7 @@ mod tests { let simple_hf_name = name.clone(); let configs = index_scheduler.embedders(configs).unwrap(); - let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap(); let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap(); let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap(); diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 535bf2dd6..f755998a1 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -395,7 +395,10 @@ impl ErrorCode for milli::Error { | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } - | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, + | UserError::InvalidPrompt(_) + | UserError::InvalidDisableBinaryQuantization { .. } => { + Code::InvalidSettingsEmbedders + } UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index dd30c793e..210a52b75 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -102,8 +102,8 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; - let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; + let (embedder_name, embedder, quantized) = + SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -111,6 +111,7 @@ async fn similar( query, embedder_name, embedder, + quantized, retrieve_vectors, index_scheduler.features(), ) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 9abfec3e3..66b6e56de 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -274,8 +274,8 @@ pub struct HybridQuery { #[derive(Clone)] pub enum SearchKind { KeywordOnly, - SemanticOnly { embedder_name: String, embedder: Arc }, - Hybrid { embedder_name: String, embedder: Arc, semantic_ratio: f32 }, + SemanticOnly { embedder_name: String, embedder: Arc, quantized: bool }, + Hybrid { embedder_name: String, embedder: Arc, quantized: bool, semantic_ratio: f32 }, } impl SearchKind { @@ -285,9 +285,9 @@ impl SearchKind { embedder_name: &str, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::SemanticOnly { embedder_name, embedder }) + Ok(Self::SemanticOnly { embedder_name, embedder, quantized }) } pub(crate) fn hybrid( @@ -297,9 +297,9 @@ impl SearchKind { semantic_ratio: f32, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio }) + Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio }) } pub(crate) fn embedder( @@ -307,16 +307,14 @@ impl SearchKind { index: &Index, embedder_name: &str, vector_len: Option, - ) -> Result<(String, Arc), ResponseError> { + ) -> Result<(String, Arc, bool), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder = embedders.get(embedder_name); - - let embedder = embedder + let (embedder, _, quantized) = embedders + .get(embedder_name) .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned())) - .map_err(milli::Error::from)? - .0; + .map_err(milli::Error::from)?; if let Some(vector_len) = vector_len { if vector_len != embedder.dimensions() { @@ -330,7 +328,7 @@ impl SearchKind { } } - Ok((embedder_name.to_owned(), embedder)) + Ok((embedder_name.to_owned(), embedder, quantized)) } } @@ -791,7 +789,7 @@ fn prepare_search<'t>( search.query(q); } } - SearchKind::SemanticOnly { embedder_name, embedder } => { + SearchKind::SemanticOnly { embedder_name, embedder, quantized } => { let vector = match query.vector.clone() { Some(vector) => vector, None => { @@ -805,14 +803,19 @@ fn prepare_search<'t>( } }; - search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); + search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector)); } - SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => { + SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => { if let Some(q) = &query.q { search.query(q); } // will be embedded in hybrid search if necessary - search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone()); + search.semantic( + embedder_name.clone(), + embedder.clone(), + *quantized, + query.vector.clone(), + ); } } @@ -1441,6 +1444,7 @@ pub fn perform_similar( query: SimilarQuery, embedder_name: String, embedder: Arc, + quantized: bool, retrieve_vectors: RetrieveVectors, features: RoFeatures, ) -> Result { @@ -1469,8 +1473,16 @@ pub fn perform_similar( )); }; - let mut similar = - milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); + let mut similar = milli::Similar::new( + internal_id, + offset, + limit, + index, + &rtxn, + embedder_name, + embedder, + quantized, + ); if let Some(ref filter) = query.filter { if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..4d82d0a03 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -80,7 +80,8 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -arroy = "0.4.0" +# arroy = "0.4.0" +arroy = { path = "../../arroy" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } diff --git a/milli/src/error.rs b/milli/src/error.rs index f0e92a9ab..f09f48c2e 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, + #[error( + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization" + )] + InvalidDisableBinaryQuantization { embedder_name: String }, #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] diff --git a/milli/src/index.rs b/milli/src/index.rs index 512e911aa..63da889c4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{Embedding, EmbeddingConfig}; +use crate::vector::{ArroyReader, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -162,7 +162,7 @@ pub struct Index { /// Maps an embedder name to its id in the arroy store. pub embedder_category_id: Database, /// Vector store based on arroy™. - pub vector_arroy: arroy::Database, + pub vector_arroy: arroy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -1612,18 +1612,11 @@ impl Index { pub fn arroy_readers<'a>( &'a self, - rtxn: &'a RoTxn<'a>, embedder_id: u8, - ) -> impl Iterator>> + 'a { - crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - arroy::Reader::open(rtxn, k, self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e.into()), - }) - .transpose() - }) + quantized: bool, + ) -> impl Iterator + 'a { + crate::vector::arroy_db_range_for_embedder(embedder_id) + .map_while(move |k| Some(ArroyReader::new(self.vector_arroy, k, quantized))) } pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { @@ -1644,32 +1637,28 @@ impl Index { docid: DocumentId, ) -> Result>> { let mut res = BTreeMap::new(); - for row in self.embedder_category_id.iter(rtxn)? { - let (embedder_name, embedder_id) = row?; + let embedding_configs = self.embedding_configs(rtxn)?; + for config in embedding_configs { + // TODO: return internal error instead + let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); let embedder_id = (embedder_id as u16) << 8; + let mut embeddings = Vec::new(); 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; + let reader = ArroyReader::new( + self.vector_arroy, + embedder_id | (i as u16), + config.config.quantized(), + ); + match reader.item_vector(rtxn, docid) { + Err(arroy::Error::MissingMetadata(_)) => break 'vectors, + Err(err) => return Err(err.into()), + Ok(None) => break 'vectors, + Ok(Some(embedding)) => embeddings.push(embedding), }; - - let embedding = reader?.item_vector(rtxn, docid)?; - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } } - res.insert(embedder_name.to_owned(), embeddings); + res.insert(config.name.to_owned(), embeddings); } Ok(res) } diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index e08111473..8b274804c 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -190,7 +190,7 @@ impl<'a> Search<'a> { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; // no embedder, no semantic search - let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { + let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; @@ -212,7 +212,7 @@ impl<'a> Search<'a> { }; search.semantic = - Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder }); + Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized }); // TODO: would be better to have two distinct functions at this point let vector_results = search.execute()?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3057066d2..d5b05f515 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -32,6 +32,7 @@ pub struct SemanticSearch { vector: Option>, embedder_name: String, embedder: Arc, + quantized: bool, } pub struct Search<'a> { @@ -89,9 +90,10 @@ impl<'a> Search<'a> { &mut self, embedder_name: String, embedder: Arc, + quantized: bool, vector: Option>, ) -> &mut Search<'a> { - self.semantic = Some(SemanticSearch { embedder_name, embedder, vector }); + self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector }); self } @@ -206,7 +208,7 @@ impl<'a> Search<'a> { degraded, used_negative_operator, } = match self.semantic.as_ref() { - Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => { + Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => { execute_vector_search( &mut ctx, vector, @@ -219,6 +221,7 @@ impl<'a> Search<'a> { self.limit, embedder_name, embedder, + *quantized, self.time_budget.clone(), self.ranking_score_threshold, )? diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index b30306a0b..4babc7acc 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -320,6 +320,7 @@ fn get_ranking_rules_for_vector<'ctx>( target: &[f32], embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result>> { // query graph search @@ -347,6 +348,7 @@ fn get_ranking_rules_for_vector<'ctx>( limit_plus_offset, embedder_name, embedder, + quantized, )?; ranking_rules.push(Box::new(vector_sort)); vector = true; @@ -576,6 +578,7 @@ pub fn execute_vector_search( length: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, time_budget: TimeBudget, ranking_score_threshold: Option, ) -> Result { @@ -591,6 +594,7 @@ pub fn execute_vector_search( vector, embedder_name, embedder, + quantized, )?; let mut placeholder_search_logger = logger::DefaultSearchLogger; diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index e56f3cbbe..653aae7f1 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -16,6 +16,7 @@ pub struct VectorSort { limit: usize, distribution_shift: Option, embedder_index: u8, + quantized: bool, } impl VectorSort { @@ -26,6 +27,7 @@ impl VectorSort { limit: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result { let embedder_index = ctx .index @@ -41,6 +43,7 @@ impl VectorSort { limit, distribution_shift: embedder.distribution(), embedder_index, + quantized, }) } @@ -49,16 +52,15 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: std::result::Result, _> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect(); - let readers = readers?; + let readers: Vec<_> = + ctx.index.arroy_readers(self.embedder_index, self.quantized).collect(); let target = &self.target; let mut results = Vec::new(); for reader in readers.iter() { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; + reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index bf5cc323f..de329c9c3 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -18,6 +18,7 @@ pub struct Similar<'a> { embedder_name: String, embedder: Arc, ranking_score_threshold: Option, + quantized: bool, } impl<'a> Similar<'a> { @@ -29,6 +30,7 @@ impl<'a> Similar<'a> { rtxn: &'a heed::RoTxn<'a>, embedder_name: String, embedder: Arc, + quantized: bool, ) -> Self { Self { id, @@ -40,6 +42,7 @@ impl<'a> Similar<'a> { embedder_name, embedder, ranking_score_threshold: None, + quantized, } } @@ -67,10 +70,7 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: std::result::Result, _> = - self.index.arroy_readers(self.rtxn, embedder_index).collect(); - - let readers = readers?; + let readers: Vec<_> = self.index.arroy_readers(embedder_index, self.quantized).collect(); let mut results = Vec::new(); @@ -79,7 +79,6 @@ impl<'a> Similar<'a> { self.rtxn, self.id, self.limit + self.offset + 1, - None, Some(&universe), )?; if let Some(mut nns_by_item) = nns_by_item { diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index e9b83b92c..38a4ebe8a 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; -use crate::vector::settings::{EmbedderAction, ReindexAction}; +use crate::vector::settings::ReindexAction; use crate::vector::{Embedder, Embeddings}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; @@ -208,65 +208,65 @@ pub fn extract_vector_points( if reindex_vectors { for (name, action) in settings_diff.embedding_config_updates.iter() { - match action { - EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted - EmbedderAction::Reindex(action) => { - let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name) - else { - tracing::error!(embedder = name, "Requested embedder config not found"); - continue; - }; + if let Some(action) = action.reindex() { + let Some((embedder_name, (embedder, prompt, _quantized))) = + configs.remove_entry(name) + else { + tracing::error!(embedder = name, "Requested embedder config not found"); + continue; + }; - // (docid, _index) -> KvWriterDelAdd -> Vector - let manual_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid, _index) -> KvWriterDelAdd -> Vector + let manual_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> (prompt) - let prompts_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> (prompt) + let prompts_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> () - let remove_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> () + let remove_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - let action = match action { - ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, - ReindexAction::RegeneratePrompts => { - let Some((_, old_prompt)) = old_configs.get(name) else { - tracing::error!(embedder = name, "Old embedder config not found"); - continue; - }; + let action = match action { + ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, + ReindexAction::RegeneratePrompts => { + let Some((_, old_prompt, _quantized)) = old_configs.get(name) else { + tracing::error!(embedder = name, "Old embedder config not found"); + continue; + }; - ExtractionAction::SettingsRegeneratePrompts { old_prompt } - } - }; + ExtractionAction::SettingsRegeneratePrompts { old_prompt } + } + }; - extractors.push(EmbedderVectorExtractor { - embedder_name, - embedder, - prompt, - prompts_writer, - remove_vectors_writer, - manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), - action, - }); - } + extractors.push(EmbedderVectorExtractor { + embedder_name, + embedder, + prompt, + prompts_writer, + remove_vectors_writer, + manual_vectors_writer, + add_to_user_provided: RoaringBitmap::new(), + action, + }); + } else { + continue; } } } else { // document operation - for (embedder_name, (embedder, prompt)) in configs.into_iter() { + for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() { // (docid, _index) -> KvWriterDelAdd -> Vector let manual_vectors_writer = create_writer( indexer.chunk_compression_type, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 6d659a7a2..29530a0bb 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::EmbeddingConfigs; +use crate::vector::{ArroyReader, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -679,6 +679,24 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; let mut rng = rand::rngs::StdRng::seed_from_u64(42); + // If an embedder wasn't used in the typedchunk but must be binary quantized + // we should insert it in `dimension` + for (name, action) in settings_diff.embedding_config_updates.iter() { + if action.is_being_quantized && !dimension.contains_key(name.as_str()) { + let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?; + let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); + let reader = + ArroyReader::new(self.index.vector_arroy, first_id, action.was_quantized); + let dim = reader.dimensions(self.wtxn)?; + dimension.insert(name.to_string(), dim); + } + } + for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; let vector_arroy = self.index.vector_arroy; @@ -686,13 +704,19 @@ where let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); + let was_quantized = embedder_config.map_or(false, |action| action.was_quantized); + let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let writer = arroy::Writer::new(vector_arroy, k, dimension); - if writer.need_build(wtxn)? { - writer.build(wtxn, &mut rng, None)?; - } else if writer.is_empty(wtxn)? { + let mut writer = ArroyReader::new(vector_arroy, k, was_quantized); + if is_quantizing { + writer.quantize(wtxn, k, dimension)?; + } + if writer.need_build(wtxn, dimension)? { + writer.build(wtxn, &mut rng, dimension)?; + } else if writer.is_empty(wtxn, dimension)? { break; } } @@ -2746,6 +2770,7 @@ mod tests { response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + binary_quantized: Setting::NotSet, }), ); settings.set_embedder_settings(embedders); @@ -2774,7 +2799,7 @@ mod tests { std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); let res = index .search(&rtxn) - .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) + .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .execute() .unwrap(); assert_eq!(res.documents_ids.len(), 3); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 73fa3ca7b..2467c0019 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; +use crate::vector::settings::WriteBackToDocuments; +use crate::vector::ArroyReader; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -989,23 +990,16 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result< - BTreeMap<&str, (Vec>, &RoaringBitmap)>, - > = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { - if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }) = action + if let Some(WriteBackToDocuments { embedder_id, user_provided }) = + action.write_back() { - let readers: Result> = - self.index.arroy_readers(wtxn, *embedder_id).collect(); - match readers { - Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), - Err(error) => Some(Err(error)), - } + let readers: Vec<_> = + self.index.arroy_readers(*embedder_id, action.was_quantized).collect(); + Some(Ok((name.as_str(), (readers, user_provided)))) } else { None } @@ -1104,23 +1098,14 @@ impl<'a, 'i> Transform<'a, 'i> { } } - let mut writers = Vec::new(); - // delete all vectors from the embedders that need removal for (_, (readers, _)) in readers { for reader in readers { - let dimensions = reader.dimensions(); - let arroy_index = reader.index(); - drop(reader); - let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions); - writers.push(writer); + let dimensions = reader.dimensions(wtxn)?; + reader.clear(wtxn, dimensions)?; } } - for writer in writers { - writer.clear(wtxn)?; - } - let grenad_params = GrenadParameters { chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_level: self.indexer_settings.chunk_compression_level, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 9de95778b..b133f7a87 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::ArroyReader; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -666,9 +667,13 @@ pub(crate) fn write_typed_chunk_into_index( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let binary_quantized = settings_diff + .embedding_config_updates + .get(&embedder_name) + .map_or(false, |conf| conf.was_quantized); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) + .map(|k| ArroyReader::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed @@ -679,7 +684,7 @@ pub(crate) fn write_typed_chunk_into_index( for writer in &writers { // Uses invariant: vectors are packed in the first writers. - if !writer.del_item(wtxn, docid)? { + if !writer.del_item(wtxn, expected_dimension, docid)? { break; } } @@ -711,7 +716,7 @@ pub(crate) fn write_typed_chunk_into_index( ))); } for (embedding, writer) in embeddings.iter().zip(&writers) { - writer.add_item(wtxn, docid, embedding)?; + writer.add_item(wtxn, expected_dimension, docid, embedding)?; } } @@ -734,7 +739,7 @@ pub(crate) fn write_typed_chunk_into_index( break; }; if candidate == vector { - writer.del_item(wtxn, docid)?; + writer.del_item(wtxn, expected_dimension, docid)?; deleted_index = Some(index); } } @@ -751,8 +756,13 @@ pub(crate) fn write_typed_chunk_into_index( if let Some((last_index, vector)) = last_index_with_a_vector { // unwrap: computed the index from the list of writers let writer = writers.get(last_index).unwrap(); - writer.del_item(wtxn, docid)?; - writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?; + writer.del_item(wtxn, expected_dimension, docid)?; + writers.get(deleted_index).unwrap().add_item( + wtxn, + expected_dimension, + docid, + &vector, + )?; } } } @@ -762,8 +772,8 @@ pub(crate) fn write_typed_chunk_into_index( // overflow was detected during vector extraction. for writer in &writers { - if !writer.contains_item(wtxn, docid)? { - writer.add_item(wtxn, docid, &vector)?; + if !writer.contains_item(wtxn, expected_dimension, docid)? { + writer.add_item(wtxn, expected_dimension, docid, &vector)?; break; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 8702e7ea6..40aa22a81 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -425,11 +425,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { + println!("inside reindex"); // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(self.wtxn)? == 0 { return Ok(()); } + println!("didnt early exit"); let transform = Transform::new( self.wtxn, @@ -954,7 +956,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let old_configs = self.index.embedding_configs(self.wtxn)?; let remove_all: Result> = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { + .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> { let embedder_id = self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( crate::InternalError::DatabaseMissingEntry { @@ -964,10 +966,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { )?; Ok(( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + config.quantized(), + ), )) }) .collect(); @@ -1004,7 +1006,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { match joined { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { - let settings_diff = SettingsDiff::from_settings(old, new); + let was_quantized = old.binary_quantized.set().unwrap_or_default(); + let settings_diff = SettingsDiff::from_settings(old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( @@ -1023,25 +1026,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.index.embedder_category_id.delete(self.wtxn, &name)?; embedder_actions.insert( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + was_quantized, + ), ); } - SettingsDiff::Reindex { action, updated_settings } => { + SettingsDiff::Reindex { action, updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), ?action, "reindex embedder" ); - embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(action, was_quantized) + .with_is_being_quantized(quantize), + ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; updated_configs.insert(name, (new, user_provided)); } - SettingsDiff::UpdateWithoutReindex { updated_settings } => { + SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), @@ -1049,6 +1056,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; + if quantize { + embedder_actions.insert( + name.clone(), + EmbedderAction::default().with_is_being_quantized(true), + ); + } updated_configs.insert(name, (new, user_provided)); } } @@ -1067,8 +1080,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { &mut setting, ); let setting = validate_embedding_settings(setting, &name)?; - embedder_actions - .insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(ReindexAction::FullReindex, false), + ); updated_configs.insert(name, (setting, RoaringBitmap::new())); } } @@ -1082,19 +1097,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { - match action { - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { - /* cannot be a new embedder, so has to have an id already */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { - if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { - let id = find_free_index() - .ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; - tracing::debug!(embedder = name, id, "assigning free id to new embedder"); - self.index.embedder_category_id.put(self.wtxn, name, &id)?; - } - } - EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ } + if matches!(action.reindex(), Some(ReindexAction::FullReindex)) + && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() + { + let id = + find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; + tracing::debug!(embedder = name, id, "assigning free id to new embedder"); + self.index.embedder_category_id.put(self.wtxn, name, &id)?; } } let updated_configs: Vec = updated_configs @@ -1277,7 +1286,11 @@ impl InnerIndexSettingsDiff { // if the user-defined searchables changed, then we need to reindex prompts. if cache_user_defined_searchables { - for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() { + for (embedder_name, (config, _, _quantized)) in + new_settings.embedding_configs.inner_as_ref() + { + let was_quantized = + old_settings.embedding_configs.get(&embedder_name).map_or(false, |conf| conf.2); // skip embedders that don't use document templates if !config.uses_document_template() { continue; @@ -1287,16 +1300,19 @@ impl InnerIndexSettingsDiff { // this always makes the code clearer by explicitly handling the cases match embedding_config_updates.entry(embedder_name.clone()) { std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts)); + entry.insert(EmbedderAction::with_reindex( + ReindexAction::RegeneratePrompts, + was_quantized, + )); + } + std::collections::btree_map::Entry::Occupied(entry) => { + let EmbedderAction { + was_quantized: _, + is_being_quantized: _, // We are deleting this embedder, so no point in regeneration + write_back: _, // We are already fully reindexing + reindex: _, // We are already regenerating prompts + } = entry.get(); } - std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { - EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */ - } - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */ - } - }, }; } } @@ -1546,7 +1562,7 @@ fn embedders(embedding_configs: Vec) -> Result) -> Result { let max_bytes = match document_template_max_bytes.set() { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { @@ -1613,6 +1630,7 @@ fn validate_prompt( response, distribution, headers, + binary_quantized: binary_quantize, })) } new => Ok(new), @@ -1638,6 +1656,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, } = settings; if let Some(0) = dimensions.set() { @@ -1678,6 +1697,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })); }; match inferred_source { @@ -1779,6 +1799,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })) } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 23417ced2..edda59121 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -1,8 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; +use arroy::distances::{Angular, BinaryQuantizedAngular}; +use arroy::ItemId; use deserr::{DeserializeError, Deserr}; +use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use self::error::{EmbedError, NewEmbedderError}; @@ -26,6 +30,171 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub struct ArroyReader { + quantized: bool, + index: u16, + database: arroy::Database, +} + +impl ArroyReader { + pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { + Self { database, index, quantized } + } + + pub fn index(&self) -> u16 { + self.index + } + + pub fn dimensions(&self, rtxn: &RoTxn) -> Result { + if self.quantized { + Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions()) + } else { + Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions()) + } + } + + pub fn quantize( + &mut self, + wtxn: &mut RwTxn, + index: u16, + dimension: usize, + ) -> Result<(), arroy::Error> { + if !self.quantized { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + writer.prepare_changing_distance::(wtxn)?; + self.quantized = true; + } + Ok(()) + } + + pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn) + } + } + + pub fn build( + &self, + wtxn: &mut RwTxn, + rng: &mut R, + dimension: usize, + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None) + } + } + + pub fn add_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } + } + + pub fn del_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id) + } + } + + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn) + } + } + + pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn) + } + } + + pub fn contains_item( + &self, + rtxn: &RoTxn, + dimension: usize, + item: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item) + } + } + + pub fn nns_by_item( + &self, + rtxn: &RoTxn, + item: ItemId, + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } + } + + pub fn nns_by_vector( + &self, + txn: &RoTxn, + item: &[f32], + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + if self.quantized { + arroy::Reader::open(txn, self.index, self.quantized_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } else { + arroy::Reader::open(txn, self.index, self.angular_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } + } + + pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid) + } + } + + fn angular_db(&self) -> arroy::Database { + self.database.remap_data_type() + } + + fn quantized_db(&self) -> arroy::Database { + self.database.remap_data_type() + } +} + /// One or multiple embeddings stored consecutively in a flat vector. pub struct Embeddings { data: Vec, @@ -124,39 +293,48 @@ pub struct EmbeddingConfig { pub embedder_options: EmbedderOptions, /// Document template pub prompt: PromptData, + /// If this embedder is binary quantized + pub quantized: Option, // TODO: add metrics and anything needed } +impl EmbeddingConfig { + pub fn quantized(&self) -> bool { + self.quantized.unwrap_or_default() + } +} + /// Map of embedder configurations. /// /// Each configuration is mapped to a name. #[derive(Clone, Default)] -pub struct EmbeddingConfigs(HashMap, Arc)>); +pub struct EmbeddingConfigs(HashMap, Arc, bool)>); impl EmbeddingConfigs { /// Create the map from its internal component.s - pub fn new(data: HashMap, Arc)>) -> Self { + pub fn new(data: HashMap, Arc, bool)>) -> Self { Self(data) } /// Get an embedder configuration and template from its name. - pub fn get(&self, name: &str) -> Option<(Arc, Arc)> { + pub fn get(&self, name: &str) -> Option<(Arc, Arc, bool)> { self.0.get(name).cloned() } - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { + pub fn inner_as_ref(&self) -> &HashMap, Arc, bool)> { &self.0 } - pub fn into_inner(self) -> HashMap, Arc)> { + pub fn into_inner(self) -> HashMap, Arc, bool)> { self.0 } } impl IntoIterator for EmbeddingConfigs { - type Item = (String, (Arc, Arc)); + type Item = (String, (Arc, Arc, bool)); - type IntoIter = std::collections::hash_map::IntoIter, Arc)>; + type IntoIter = + std::collections::hash_map::IntoIter, Arc, bool)>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index b7ae90d89..9b2c1c6e3 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -32,6 +32,9 @@ pub struct EmbeddingSettings { pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] @@ -85,23 +88,62 @@ pub enum ReindexAction { pub enum SettingsDiff { Remove, - Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, - UpdateWithoutReindex { updated_settings: EmbeddingSettings }, + Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool }, + UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool }, } -pub enum EmbedderAction { - WriteBackToDocuments(WriteBackToDocuments), - Reindex(ReindexAction), +#[derive(Default, Debug)] +pub struct EmbedderAction { + pub was_quantized: bool, + pub is_being_quantized: bool, + pub write_back: Option, + pub reindex: Option, } +impl EmbedderAction { + pub fn is_being_quantized(&self) -> bool { + self.is_being_quantized + } + + pub fn write_back(&self) -> Option<&WriteBackToDocuments> { + self.write_back.as_ref() + } + + pub fn reindex(&self) -> Option<&ReindexAction> { + self.reindex.as_ref() + } + + pub fn with_is_being_quantized(mut self, quantize: bool) -> Self { + self.is_being_quantized = quantize; + self + } + + pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self { + Self { + was_quantized, + is_being_quantized: false, + write_back: Some(write_back), + reindex: None, + } + } + + pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self { + Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) } + } +} + +#[derive(Debug)] pub struct WriteBackToDocuments { pub embedder_id: u8, pub user_provided: RoaringBitmap, } impl SettingsDiff { - pub fn from_settings(old: EmbeddingSettings, new: Setting) -> Self { - match new { + pub fn from_settings( + old: EmbeddingSettings, + new: Setting, + ) -> Result { + let ret = match new { Setting::Set(new) => { let EmbeddingSettings { mut source, @@ -116,6 +158,7 @@ impl SettingsDiff { mut distribution, mut headers, mut document_template_max_bytes, + binary_quantized: mut binary_quantize, } = old; let EmbeddingSettings { @@ -131,8 +174,17 @@ impl SettingsDiff { distribution: new_distribution, headers: new_headers, document_template_max_bytes: new_document_template_max_bytes, + binary_quantized: new_binary_quantize, } = new; + if matches!(binary_quantize, Setting::Set(true)) + && matches!(new_binary_quantize, Setting::Set(false)) + { + return Err(UserError::InvalidDisableBinaryQuantization { + embedder_name: String::from("todo"), + }); + } + let mut reindex_action = None; // **Warning**: do not use short-circuiting || here, we want all these operations applied @@ -172,6 +224,7 @@ impl SettingsDiff { _ => {} } } + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); if url.apply(new_url) { match source { // do not regenerate on an url change in OpenAI @@ -231,16 +284,27 @@ impl SettingsDiff { distribution, headers, document_template_max_bytes, + binary_quantized: binary_quantize, }; match reindex_action { - Some(action) => Self::Reindex { action, updated_settings }, - None => Self::UpdateWithoutReindex { updated_settings }, + Some(action) => Self::Reindex { + action, + updated_settings, + quantize: binary_quantize_changed, + }, + None => Self::UpdateWithoutReindex { + updated_settings, + quantize: binary_quantize_changed, + }, } } Setting::Reset => Self::Remove, - Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, - } + Setting::NotSet => { + Self::UpdateWithoutReindex { updated_settings: old, quantize: false } + } + }; + Ok(ret) } } @@ -486,7 +550,7 @@ impl std::fmt::Display for EmbedderSource { impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { - let EmbeddingConfig { embedder_options, prompt } = value; + let EmbeddingConfig { embedder_options, prompt, quantized } = value; let document_template_max_bytes = Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { @@ -507,6 +571,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { url, @@ -527,6 +592,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { embedding_model, @@ -547,6 +613,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { dimensions, @@ -564,6 +631,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { api_key, @@ -586,6 +654,7 @@ impl From for EmbeddingSettings { response: Setting::Set(response), distribution: Setting::some_or_not_set(distribution), headers: Setting::Set(headers), + binary_quantized: Setting::some_or_not_set(quantized), }, } } @@ -607,8 +676,11 @@ impl From for EmbeddingConfig { response, distribution, headers, + binary_quantized, } = value; + this.quantized = binary_quantized.set(); + if let Some(source) = source.set() { match source { EmbedderSource::OpenAi => { From 79f29eed3c6b9839b4a0ed462a1a35b3b1e8b395 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:32:17 +0200 Subject: [PATCH 41/60] fix the tests and the arroy_readers method --- index-scheduler/src/lib.rs | 27 ++++--------------- .../Intel to kefir succeeds.snap | 2 +- .../lib.rs/import_vectors/Intel to kefir.snap | 2 +- .../import_vectors/adding Intel succeeds.snap | 2 +- .../import_vectors/after adding Intel.snap | 2 +- ...ter_registering_settings_task_vectors.snap | 2 +- .../settings_update_processed_vectors.snap | 2 +- .../after_registering_settings_task.snap | 2 +- .../settings_update_processed.snap | 2 +- milli/src/index.rs | 15 ++++++++--- milli/src/search/new/vector_sort.rs | 6 ++--- milli/src/search/similar.rs | 4 ++- milli/src/update/index_documents/transform.rs | 11 +++++--- 13 files changed, 39 insertions(+), 40 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 2126b0b94..fe8244f9b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -5522,6 +5522,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[1, 2]>, }, @@ -5534,28 +5535,8 @@ mod tests { // the document with the id 3 should keep its original embedding let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let mut embeddings = Vec::new(); - - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap(); - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; snapshot!(embeddings.len(), @"1"); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); @@ -5740,6 +5721,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[0]>, }, @@ -5783,6 +5765,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[]>, }, diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index 5eccdc57a..41cfcfdab 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index e7c7382d5..e6d0d8232 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index ac3b3f2d9..bd4cf0c09 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index e67ef0e51..746c7c870 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 84d8486e1..15cfd732a 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index 6ef17024d..9b5c6ce4c 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index cf710b40f..37f0a062d 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 9b5b465ab..3906fc6fc 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/milli/src/index.rs b/milli/src/index.rs index 63da889c4..9b06e9645 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1612,11 +1612,20 @@ impl Index { pub fn arroy_readers<'a>( &'a self, + rtxn: &'a RoTxn<'a>, embedder_id: u8, quantized: bool, - ) -> impl Iterator + 'a { - crate::vector::arroy_db_range_for_embedder(embedder_id) - .map_while(move |k| Some(ArroyReader::new(self.vector_arroy, k, quantized))) + ) -> impl Iterator> + 'a { + crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { + let reader = ArroyReader::new(self.vector_arroy, k, quantized); + // Here we don't care about the dimensions, but we want to know if we can read + // in the database or if its medata are missing. + match reader.dimensions(rtxn) { + Ok(_) => Some(Ok(reader)), + Err(arroy::Error::MissingMetadata(_)) => None, + Err(e) => Some(Err(e.into())), + } + }) } pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 653aae7f1..47480c315 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -52,13 +52,13 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: Vec<_> = - ctx.index.arroy_readers(self.embedder_index, self.quantized).collect(); + let readers: Result> = + ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized).collect(); let target = &self.target; let mut results = Vec::new(); - for reader in readers.iter() { + for reader in readers?.iter() { let nns_by_vector = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index de329c9c3..ac56e10fa 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -70,7 +70,9 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: Vec<_> = self.index.arroy_readers(embedder_index, self.quantized).collect(); + let readers: Result> = + self.index.arroy_readers(self.rtxn, embedder_index, self.quantized).collect(); + let readers = readers?; let mut results = Vec::new(); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 2467c0019..b1a5e4b2d 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -997,9 +997,14 @@ impl<'a, 'i> Transform<'a, 'i> { if let Some(WriteBackToDocuments { embedder_id, user_provided }) = action.write_back() { - let readers: Vec<_> = - self.index.arroy_readers(*embedder_id, action.was_quantized).collect(); - Some(Ok((name.as_str(), (readers, user_provided)))) + let readers: Result> = self + .index + .arroy_readers(wtxn, *embedder_id, action.was_quantized) + .collect(); + match readers { + Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), + Err(error) => Some(Err(error)), + } } else { None } From 2b6952eda12d0d0a6f79e2c5765672efc9835c51 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:35:17 +0200 Subject: [PATCH 42/60] rename the ArroyReader to an ArroyWrapper since it can read and write --- milli/src/index.rs | 8 ++++---- milli/src/update/index_documents/mod.rs | 6 +++--- milli/src/update/index_documents/transform.rs | 4 ++-- milli/src/update/index_documents/typed_chunk.rs | 4 ++-- milli/src/vector/mod.rs | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 9b06e9645..2dd6c6541 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{ArroyReader, Embedding, EmbeddingConfig}; +use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -1615,9 +1615,9 @@ impl Index { rtxn: &'a RoTxn<'a>, embedder_id: u8, quantized: bool, - ) -> impl Iterator> + 'a { + ) -> impl Iterator> + 'a { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - let reader = ArroyReader::new(self.vector_arroy, k, quantized); + let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); // Here we don't care about the dimensions, but we want to know if we can read // in the database or if its medata are missing. match reader.dimensions(rtxn) { @@ -1654,7 +1654,7 @@ impl Index { let mut embeddings = Vec::new(); 'vectors: for i in 0..=u8::MAX { - let reader = ArroyReader::new( + let reader = ArroyWrapper::new( self.vector_arroy, embedder_id | (i as u16), config.config.quantized(), diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 29530a0bb..d8566582c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::{ArroyReader, EmbeddingConfigs}; +use crate::vector::{ArroyWrapper, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -691,7 +691,7 @@ where )?; let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); let reader = - ArroyReader::new(self.index.vector_arroy, first_id, action.was_quantized); + ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized); let dim = reader.dimensions(self.wtxn)?; dimension.insert(name.to_string(), dim); } @@ -710,7 +710,7 @@ where pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let mut writer = ArroyReader::new(vector_arroy, k, was_quantized); + let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized); if is_quantizing { writer.quantize(wtxn, k, dimension)?; } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index b1a5e4b2d..bb2cfe56c 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -29,7 +29,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::settings::WriteBackToDocuments; -use crate::vector::ArroyReader; +use crate::vector::ArroyWrapper; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -990,7 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result, &RoaringBitmap)>> = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index b133f7a87..90e49d23b 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; -use crate::vector::ArroyReader; +use crate::vector::ArroyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -673,7 +673,7 @@ pub(crate) fn write_typed_chunk_into_index( .map_or(false, |conf| conf.was_quantized); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| ArroyReader::new(index.vector_arroy, k, binary_quantized)) + .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index edda59121..d52e68bbe 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -30,13 +30,13 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; -pub struct ArroyReader { +pub struct ArroyWrapper { quantized: bool, index: u16, database: arroy::Database, } -impl ArroyReader { +impl ArroyWrapper { pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { Self { database, index, quantized } } From ca71b63ed1e1dbe700d9ee393167e6eea3361914 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:55:20 +0200 Subject: [PATCH 43/60] adds integration tests --- meilisearch/tests/vector/binary_quantized.rs | 287 +++++++++++++++++++ meilisearch/tests/vector/mod.rs | 1 + 2 files changed, 288 insertions(+) create mode 100644 meilisearch/tests/vector/binary_quantized.rs diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs new file mode 100644 index 000000000..0f3819586 --- /dev/null +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -0,0 +1,287 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{GetAllDocumentsOptions, Server}; +use crate::json; +use crate::vector::generate_default_user_provided_documents; + +#[actix_rt::test] +async fn binary_quantize_before_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", + "code": "internal", + "type": "internal", + "link": "https://docs.meilisearch.com/errors#internal" + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", + "code": "internal", + "type": "internal", + "link": "https://docs.meilisearch.com/errors#internal" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_after_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn try_to_disable_binary_quantization() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let ret = server.wait_task(response.uid()).await; + snapshot!(ret, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false + } + } + }, + "error": { + "message": "`.embedders.todo.binaryQuantized`: Cannot disable the binary quantization", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_clear_documents() { + let server = Server::new().await; + let index = generate_default_user_provided_documents(&server).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (value, _code) = index.clear_all_documents().await; + index.wait_task(value.uid()).await; + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "hits": [], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + "semanticHitCount": 0 + } + "###); +} diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 0e38c1366..47d0c1051 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod binary_quantized; mod openai; mod rest; mod settings; From 3f6301dbc953e8b976e1de0d08caf0ac397a7db4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:58:45 +0200 Subject: [PATCH 44/60] fix the missing embedder name in the error message when trying to disable the binary quantization --- meilisearch/tests/vector/binary_quantized.rs | 2 +- milli/src/update/settings.rs | 2 +- milli/src/vector/settings.rs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 0f3819586..10f731d75 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -225,7 +225,7 @@ async fn try_to_disable_binary_quantization() { } }, "error": { - "message": "`.embedders.todo.binaryQuantized`: Cannot disable the binary quantization", + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 40aa22a81..63db5237c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1007,7 +1007,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { let was_quantized = old.binary_quantized.set().unwrap_or_default(); - let settings_diff = SettingsDiff::from_settings(old, new)?; + let settings_diff = SettingsDiff::from_settings(&name, old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 9b2c1c6e3..3bb7f09e6 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -140,6 +140,7 @@ pub struct WriteBackToDocuments { impl SettingsDiff { pub fn from_settings( + embedder_name: &str, old: EmbeddingSettings, new: Setting, ) -> Result { @@ -181,7 +182,7 @@ impl SettingsDiff { && matches!(new_binary_quantize, Setting::Set(false)) { return Err(UserError::InvalidDisableBinaryQuantization { - embedder_name: String::from("todo"), + embedder_name: embedder_name.to_string(), }); } From e8d7c00d30367fc660cc471e03e56b6e65f2f7aa Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:16:30 +0200 Subject: [PATCH 45/60] add a test on the settings value --- meilisearch/src/routes/indexes/settings.rs | 9 ++- meilisearch/tests/vector/binary_quantized.rs | 84 ++++++++++++++++++-- 2 files changed, 85 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index ceea17668..aaf8673d0 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -643,12 +643,19 @@ fn embedder_analytics( .max() }); + let binary_quantization_used = setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }); + json!( { "total": setting.as_ref().map(|s| s.len()), "sources": sources, "document_template_used": document_template_used, - "document_template_max_bytes": document_template_max_bytes + "document_template_max_bytes": document_template_max_bytes, + "binary_quantization_used": binary_quantization_used, } ) } diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 10f731d75..469ec878b 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -4,6 +4,76 @@ use crate::common::{GetAllDocumentsOptions, Server}; use crate::json; use crate::vector::generate_default_user_provided_documents; +#[actix_rt::test] +async fn retrieve_binary_quantize_status_in_the_settings() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###); +} + #[actix_rt::test] async fn binary_quantize_before_sending_documents() { let server = Server::new().await; @@ -32,7 +102,7 @@ async fn binary_quantize_before_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, @@ -40,7 +110,7 @@ async fn binary_quantize_before_sending_documents() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // Make sure the documents DB has been cleared let (documents, _code) = index @@ -94,7 +164,7 @@ async fn binary_quantize_after_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, @@ -102,7 +172,7 @@ async fn binary_quantize_after_sending_documents() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -116,7 +186,7 @@ async fn binary_quantize_after_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); // Make sure the documents are binary quantized let (documents, _code) = index @@ -193,7 +263,7 @@ async fn try_to_disable_binary_quantization() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -256,7 +326,7 @@ async fn binary_quantize_clear_documents() { server.wait_task(response.uid()).await.succeeded(); let (value, _code) = index.clear_all_documents().await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // Make sure the documents DB has been cleared let (documents, _code) = index From 633537ccd71811a1ee0cd6bd2c2d5b41b0f710a4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:41:55 +0200 Subject: [PATCH 46/60] fix updating documents without updating the settings --- meilisearch/tests/vector/binary_quantized.rs | 54 +++++++++++++------ milli/src/update/index_documents/mod.rs | 6 ++- .../src/update/index_documents/typed_chunk.rs | 5 +- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 469ec878b..0f3d01c2d 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -112,27 +112,49 @@ async fn binary_quantize_before_sending_documents() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await.succeeded(); - // Make sure the documents DB has been cleared + // Make sure the documents are binary quantized let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .await; snapshot!(json_string!(documents), @r###" { - "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", - "code": "internal", - "type": "internal", - "link": "https://docs.meilisearch.com/errors#internal" - } - "###); - - // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; - snapshot!(documents, @r###" - { - "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", - "code": "internal", - "type": "internal", - "link": "https://docs.meilisearch.com/errors#internal" + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 } "###); } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index d8566582c..326dd842d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -705,7 +705,11 @@ where InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); - let was_quantized = embedder_config.map_or(false, |action| action.was_quantized); + let was_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 90e49d23b..97a4bf712 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -668,9 +668,10 @@ pub(crate) fn write_typed_chunk_into_index( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; let binary_quantized = settings_diff - .embedding_config_updates + .old + .embedding_configs .get(&embedder_name) - .map_or(false, |conf| conf.was_quantized); + .map_or(false, |conf| conf.2); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) From 84f842233d815eac87692d33cadf5ea6ebeeaa8d Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:51:09 +0200 Subject: [PATCH 47/60] snapshots the embedder settings in the dump import with vector test --- dump/src/reader/mod.rs | 2 + ...__test__import_dump_v6_with_vectors-5.snap | 829 +-------- ...__test__import_dump_v6_with_vectors-6.snap | 1553 ++++++++--------- ...__test__import_dump_v6_with_vectors-7.snap | 1541 ++++++++-------- ...__test__import_dump_v6_with_vectors-8.snap | 1549 ++++++++-------- ...__test__import_dump_v6_with_vectors-9.snap | 780 +++++++++ 6 files changed, 3156 insertions(+), 3098 deletions(-) create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 3b96cbfb0..4f66ed8b3 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -255,6 +255,8 @@ pub(crate) mod test { } "###); + insta::assert_json_snapshot!(vector_index.settings().unwrap()); + { let documents: Result> = vector_index.documents().unwrap().collect(); let mut documents = documents.unwrap(); diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap index 43bdb9726..77694a629 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap @@ -1,783 +1,56 @@ --- source: dump/src/reader/mod.rs -expression: document +expression: vector_index.settings().unwrap() --- { - "id": "e3", - "desc": "overriden vector + map", - "_vectors": { - "default": [ - 0.2, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ], - "toto": [ - 0.1 - ] - } + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "proximityPrecision": "byWord", + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + }, + "embedders": { + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "revision": "617ca489d9e86b49b8167676d8220688b99db36e", + "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" + } + }, + "searchCutoffMs": null } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap index a9c76227a..43bdb9726 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap @@ -3,784 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e2", - "desc": "natural vector + map", + "id": "e3", + "desc": "overriden vector + map", "_vectors": { - "toto": [], - "default": { - "embeddings": [ - [ - -0.05189208313822746, - -0.9273212552070618, - 0.1443813145160675, - 0.0932632014155388, - 0.2665371894836426, - 0.36266782879829407, - 0.6402910947799683, - 0.32014018297195435, - 0.030915971845388412, - -0.9312191605567932, - -0.3718109726905823, - -0.2700554132461548, - -1.1014580726623535, - 0.9154956936836244, - -0.3406888246536255, - 1.0077725648880005, - 0.6577560901641846, - -0.3955195546150207, - -0.4148270785808563, - 0.1855088472366333, - 0.5062315464019775, - -0.3632686734199524, - -0.2277890294790268, - 0.2560805082321167, - -0.3853609561920166, - -0.1604762226343155, - -0.13947471976280212, - -0.20147813856601715, - -0.4466346800327301, - -0.3761846721172333, - 0.1443382054567337, - 0.18205296993255615, - 0.49359792470932007, - -0.22538000345230105, - -0.4996317625045776, - -0.22734887897968292, - -0.6034309267997742, - -0.7857939600944519, - -0.34923747181892395, - -0.3466345965862274, - 0.21176661550998688, - -0.5101462006568909, - -0.3403083384037018, - 0.000315118464641273, - 0.236465722322464, - -0.10246097296476364, - -1.3013339042663574, - 0.3419138789176941, - -0.32963496446609497, - -0.0901619717478752, - -0.5426247119903564, - 0.22656650841236117, - -0.44758284091949463, - 0.14151698350906372, - -0.1089438870549202, - 0.5500766634941101, - -0.670711100101471, - -0.6227269768714905, - 0.3894464075565338, - -0.27609574794769287, - 0.7028202414512634, - -0.19697771966457367, - 0.328511506319046, - 0.5063360929489136, - 0.4065195322036743, - 0.2614171802997589, - -0.30274391174316406, - 1.0393824577331543, - -0.7742937207221985, - -0.7874112129211426, - -0.6749666929244995, - 0.5190866589546204, - 0.004123548045754433, - -0.28312963247299194, - -0.038731709122657776, - -1.0142987966537476, - -0.09519586712121964, - 0.8755272626876831, - 0.4876938760280609, - 0.7811151742935181, - 0.85174959897995, - 0.11826585978269576, - 0.5373436808586121, - 0.3649002015590668, - 0.19064077734947205, - -0.00287026260048151, - -0.7305403351783752, - -0.015206154435873032, - -0.7899249196052551, - 0.19407285749912265, - 0.08596625179052353, - -0.28976231813430786, - -0.1525907665491104, - 0.3798313438892365, - 0.050306469202041626, - -0.5697937607765198, - 0.4219021201133728, - 0.276252806186676, - 0.1559903472661972, - 0.10030482709407806, - -0.4043720066547394, - -0.1969818025827408, - 0.5739826560020447, - 0.2116064727306366, - -1.4620544910430908, - -0.7802462577819824, - -0.24739810824394223, - -0.09791352599859238, - -0.4413802027702331, - 0.21549351513385773, - -0.9520436525344848, - -0.08762510865926743, - 0.08154498040676117, - -0.6154940724372864, - -1.01079523563385, - 0.885427713394165, - 0.6967288851737976, - 0.27186504006385803, - -0.43194177746772766, - -0.11248451471328735, - 0.7576630711555481, - 0.4998855590820313, - 0.0264343973249197, - 0.9872855544090272, - 0.5634694695472717, - 0.053698331117630005, - 0.19410227239131927, - 0.3570743501186371, - -0.23670297861099243, - -0.9114483594894408, - 0.07884842902421951, - 0.7318344116210938, - 0.44630110263824463, - 0.08745364099740982, - -0.347101628780365, - -0.4314247667789459, - -0.5060274004936218, - 0.003706763498485088, - 0.44320008158683777, - -0.00788921769708395, - -0.1368623524904251, - -0.17391923069953918, - 0.14473655819892883, - 0.10927865654230118, - 0.6974599361419678, - 0.005052129738032818, - -0.016953065991401672, - -0.1256176233291626, - -0.036742497235536575, - 0.5591985583305359, - -0.37619709968566895, - 0.22429119050502777, - 0.5403043031692505, - -0.8603790998458862, - -0.3456307053565979, - 0.9292937517166138, - 0.5074859261512756, - 0.6310645937919617, - -0.3091641068458557, - 0.46902573108673096, - 0.7891915440559387, - 0.4499550759792328, - 0.2744995653629303, - 0.2712305784225464, - -0.04349074140191078, - -0.3638863265514374, - 0.7839881777763367, - 0.7352104783058167, - -0.19457511603832245, - -0.5957832932472229, - -0.43704694509506226, - -1.084769368171692, - 0.4904985725879669, - 0.5385226011276245, - 0.1891629993915558, - 0.12338479608297348, - 0.8315675258636475, - -0.07830192148685455, - 1.0916285514831543, - -0.28066861629486084, - -1.3585069179534912, - 0.5203898549079895, - 0.08678033947944641, - -0.2566044330596924, - 0.09484415501356123, - -0.0180208683013916, - 1.0264745950698853, - -0.023572135716676712, - 0.5864979028701782, - 0.7625196576118469, - -0.2543414533138275, - -0.8877770900726318, - 0.7611982822418213, - -0.06220436468720436, - 0.937336564064026, - 0.2704363465309143, - -0.37733694911003113, - 0.5076137781143188, - -0.30641937255859375, - 0.6252772808074951, - -0.0823579877614975, - -0.03736555948853493, - 0.4131673276424408, - -0.6514252424240112, - 0.12918265163898468, - -0.4483584463596344, - 0.6750786304473877, - -0.37008383870124817, - -0.02324833907186985, - 0.38027650117874146, - -0.26374951004981995, - 0.4346931278705597, - 0.42882832884788513, - -0.48798441886901855, - 1.1882442235946655, - 0.5132288336753845, - 0.5284568667411804, - -0.03538886830210686, - 0.29620853066444397, - -1.0683696269989014, - 0.25936177372932434, - 0.10404160618782043, - -0.25796034932136536, - 0.027896970510482788, - -0.09225251525640488, - 1.4811025857925415, - 0.641173779964447, - -0.13838383555412292, - -0.3437179923057556, - 0.5667019486427307, - -0.5400741696357727, - 0.31090837717056274, - 0.6470608115196228, - -0.3747067153453827, - -0.7364534735679626, - -0.07431528717279434, - 0.5173454880714417, - -0.6578747034072876, - 0.7107478976249695, - -0.7918999791145325, - -0.0648345872759819, - 0.609937846660614, - -0.7329513430595398, - 0.9741371870040894, - 0.17912346124649048, - -0.02658769302070141, - 0.5162150859832764, - -0.3978803157806397, - -0.7833885550498962, - -0.6497276425361633, - -0.3898126780986786, - -0.0952848568558693, - 0.2663288116455078, - -0.1604052186012268, - 0.373076468706131, - -0.8357769250869751, - -0.05217683315277099, - -0.2680160701274872, - 0.8389158248901367, - 0.6833611130714417, - -0.6712407469749451, - 0.7406917214393616, - -0.44522786140441895, - -0.34645363688468933, - -0.27384576201438904, - -0.9878405928611756, - -0.8166060447692871, - 0.06268279999494553, - 0.38567957282066345, - -0.3274703919887543, - 0.5296315550804138, - -0.11810623109340668, - 0.23029841482639313, - 0.08616159111261368, - -0.2195747196674347, - 0.09430307894945145, - 0.4057176411151886, - 0.4892159104347229, - -0.1636916548013687, - -0.6071445345878601, - 0.41256585717201233, - 0.622254490852356, - -0.41223976016044617, - -0.6686707139015198, - -0.7474371790885925, - -0.8509522080421448, - -0.16754287481307983, - -0.9078601002693176, - -0.29653599858283997, - -0.5020652413368225, - 0.4692700505256653, - 0.01281109917908907, - -0.16071580350399017, - 0.03388889133930206, - -0.020511148497462273, - 0.5027827024459839, - -0.20729811489582065, - 0.48107290267944336, - 0.33669769763946533, - -0.5275911688804626, - 0.48271527886390686, - 0.2738940715789795, - -0.033152539283037186, - -0.13629786670207977, - -0.05965912342071533, - -0.26200807094573975, - 0.04002794995903969, - -0.34095603227615356, - -3.986898899078369, - -0.46819332242012024, - -0.422744482755661, - -0.169097900390625, - 0.6008929014205933, - 0.058016058057546616, - -0.11401277780532836, - -0.3077819049358368, - -0.09595538675785063, - 0.6723822355270386, - 0.19367831945419312, - 0.28304359316825867, - 0.1609862744808197, - 0.7567598819732666, - 0.6889985799789429, - 0.06907720118761063, - -0.04188092052936554, - -0.7434936165809631, - 0.13321782648563385, - 0.8456063270568848, - -0.10364038497209548, - -0.45084846019744873, - -0.4758241474628449, - 0.43882066011428833, - -0.6432598829269409, - 0.7217311859130859, - -0.24189773201942444, - 0.12737572193145752, - -1.1008601188659668, - -0.3305315673351288, - 0.14614742994308472, - -0.7819333076477051, - 0.5287120342254639, - -0.055538054555654526, - 0.1877404749393463, - -0.6907662153244019, - 0.5616975426673889, - -0.4611121714115143, - -0.26109233498573303, - -0.12898315489292145, - -0.3724522292613983, - -0.7191406488418579, - -0.4425233602523804, - -0.644108235836029, - 0.8424481153488159, - 0.17532426118850708, - -0.5121750235557556, - -0.6467239260673523, - -0.0008507720194756985, - 0.7866212129592896, - -0.02644744887948036, - -0.005045140627771616, - 0.015782782807946205, - 0.16334445774555206, - -0.1913367658853531, - -0.13697923719882965, - -0.6684983372688293, - 0.18346354365348816, - -0.341105580329895, - 0.5427411198616028, - 0.3779832422733307, - -0.6778115034103394, - -0.2931850254535675, - -0.8805161714553833, - -0.4212774932384491, - -0.5368952751159668, - -1.3937891721725464, - -1.225494146347046, - 0.4276703894138336, - 1.1205668449401855, - -0.6005299687385559, - 0.15732505917549133, - -0.3914784789085388, - -1.357046604156494, - -0.4707142114639282, - -0.1497287154197693, - -0.25035548210144043, - -0.34328439831733704, - 0.39083412289619446, - 0.1623048633337021, - -0.9275814294815063, - -0.6430015563964844, - 0.2973862886428833, - 0.5580436587333679, - -0.6232585310935974, - -0.6611042022705078, - 0.4015969038009643, - -1.0232892036437988, - -0.2585645020008087, - -0.5431421399116516, - 0.5021264553070068, - -0.48601630330085754, - -0.010242084041237833, - 0.5862035155296326, - 0.7316920161247253, - 0.4036808013916016, - 0.4269520044326782, - -0.705938458442688, - 0.7747307419776917, - 0.10164368897676468, - 0.7887958884239197, - -0.9612497091293336, - 0.12755516171455383, - 0.06812842190265656, - -0.022603651508688927, - 0.14722754061222076, - -0.5588505268096924, - -0.20689940452575684, - 0.3557641804218292, - -0.6812759637832642, - 0.2860803008079529, - -0.38954633474349976, - 0.1759403496980667, - -0.5678874850273132, - -0.1692986786365509, - -0.14578519761562347, - 0.5711379051208496, - 1.0208125114440918, - 0.7759483456611633, - -0.372348427772522, - -0.5460885763168335, - 0.7190321683883667, - -0.6914990544319153, - 0.13365162909030914, - -0.4854792356491089, - 0.4054908752441406, - 0.4502798914909363, - -0.3041122555732727, - -0.06726965308189392, - -0.05570871382951737, - -0.0455719493329525, - 0.4785125255584717, - 0.8867972493171692, - 0.4107886850833893, - 0.6121342182159424, - -0.20477132499217987, - -0.5598517656326294, - -0.6443566679954529, - -0.5905212759971619, - -0.5571200251579285, - 0.17573799192905426, - -0.28621870279312134, - 0.1685224026441574, - 0.09719007462263109, - -0.04223639518022537, - -0.28623101115226746, - -0.1449810117483139, - -0.3789580464363098, - -0.5227636098861694, - -0.049728814512491226, - 0.7849089503288269, - 0.16792525351047516, - 0.9849340915679932, - -0.6559549570083618, - 0.35723909735679626, - -0.6822739243507385, - 1.2873116731643677, - 0.19993330538272855, - 0.03512010723352432, - -0.6972134113311768, - 0.18453484773635864, - -0.2437680810689926, - 0.2156416028738022, - 0.5230382680892944, - 0.22020135819911957, - 0.8314080238342285, - 0.15627102553844452, - -0.7330264449119568, - 0.3888184726238251, - -0.22034703195095065, - 0.5457669496536255, - -0.48084837198257446, - -0.45576658844947815, - -0.09287727624177931, - -0.06968110054731369, - 0.35125672817230225, - -0.4278119504451752, - 0.2038476765155792, - 0.11392722278833388, - 0.9433983564376832, - -0.4097744226455689, - 0.035297419875860214, - -0.4274404048919678, - -0.25100165605545044, - 1.0943366289138794, - -0.07634022831916809, - -0.2925529479980469, - -0.7512530088424683, - 0.2649727463722229, - -0.4078235328197479, - -0.3372223973274231, - 0.05190162733197212, - 0.005654910113662481, - -0.0001571219472680241, - -0.35445958375930786, - -0.7837416529655457, - 0.1500556766986847, - 0.4383024573326111, - 0.6099548935890198, - 0.05951934307813645, - -0.21325334906578064, - 0.0199207104742527, - -0.22704418003559113, - -0.6481077671051025, - 0.37442275881767273, - -1.015955924987793, - 0.38637226819992065, - -0.06489371508359909, - -0.494120329618454, - 0.3469836115837097, - 0.15402406454086304, - -0.7660972476005554, - -0.7053225040435791, - -0.25964751839637756, - 0.014004424214363098, - -0.2860170006752014, - -0.17565494775772095, - -0.45117494463920593, - -0.0031954257283359766, - 0.09676837921142578, - -0.514464259147644, - 0.41698193550109863, - -0.21642713248729703, - -0.5398141145706177, - -0.3647628426551819, - 0.37005379796028137, - 0.239425927400589, - -0.08833975344896317, - 0.934946596622467, - -0.48340797424316406, - 0.6241437792778015, - -0.7253676652908325, - -0.04303571209311485, - 1.1125205755233765, - -0.15692919492721558, - -0.2914651036262512, - -0.5117168426513672, - 0.21365483105182648, - 0.4924402534961701, - 0.5269662141799927, - 0.0352792888879776, - -0.149167999625206, - -0.6019760370254517, - 0.08245442807674408, - 0.4900692105293274, - 0.518824577331543, - -0.00005570516441366635, - -0.553304135799408, - 0.22217543423175812, - 0.5047767758369446, - 0.135724738240242, - 1.1511540412902832, - -0.3541218340396881, - -0.9712511897087096, - 0.8353699445724487, - -0.39227569103240967, - -0.9117669463157654, - -0.26349931955337524, - 0.05597023293375969, - 0.20695461332798004, - 0.3178807199001312, - 1.0663238763809204, - 0.5062212347984314, - 0.7288597822189331, - 0.09899299591779707, - 0.553720235824585, - 0.675009548664093, - -0.20067055523395536, - 0.3138423264026642, - -0.6886593103408813, - -0.2910398542881012, - -1.3186300992965698, - -0.4684459865093231, - -0.095743365585804, - -0.1257995069026947, - -0.4858281314373016, - -0.4935407340526581, - -0.3266896903514862, - -0.3928797245025635, - -0.40803104639053345, - -0.9975396394729614, - 0.4229583740234375, - 0.37309643626213074, - 0.4431034922599793, - 0.30364808440208435, - -0.3765178918838501, - 0.5616499185562134, - 0.16904796659946442, - -0.7343707084655762, - 0.2560209631919861, - 0.6166825294494629, - 0.3200829327106476, - -0.4483652710914612, - 0.16224201023578644, - -0.31495288014411926, - -0.42713335156440735, - 0.7270734906196594, - 0.7049484848976135, - -0.0571461021900177, - 0.04477125033736229, - -0.6647796034812927, - 1.183672308921814, - 0.36199676990509033, - 0.046881116926670074, - 0.4515796303749085, - 0.9278061985969543, - 0.31471705436706543, - -0.7073333859443665, - -0.3443860113620758, - 0.5440067052841187, - -0.15020819008350372, - -0.541202962398529, - 0.5203295946121216, - 1.2192286252975464, - -0.9983593225479126, - -0.18758884072303772, - 0.2758221924304962, - -0.6511523723602295, - -0.1584404855966568, - -0.236241415143013, - 0.2692437767982483, - -0.4941152036190033, - 0.4987454116344452, - -0.3331359028816223, - 0.3163745701313019, - 0.745529294013977, - -0.2905873656272888, - 0.13602906465530396, - 0.4679684340953827, - 1.0555986166000366, - 1.075700044631958, - 0.5368486046791077, - -0.5118206739425659, - 0.8668332099914551, - -0.5726966857910156, - -0.7811751961708069, - 0.1938626915216446, - -0.1929349899291992, - 0.1757766306400299, - 0.6384295225143433, - 0.26462844014167786, - 0.9542630314826964, - 0.19313029944896695, - 1.264248013496399, - -0.6304428577423096, - 0.0487106591463089, - -0.16211535036563873, - -0.7894763350486755, - 0.3582514822483063, - -0.04153040423989296, - 0.635784387588501, - 0.6554391980171204, - -0.47010496258735657, - -0.8302040696144104, - -0.1350124627351761, - 0.2568812072277069, - 0.13614831864833832, - -0.2563649117946625, - -1.0434694290161133, - 0.3232482671737671, - 0.47882452607154846, - 0.4298652410507202, - 1.0563770532608032, - -0.28917592763900757, - -0.8533256649971008, - 0.10648339986801147, - 0.6376127004623413, - -0.20832888782024384, - 0.2370245456695557, - 0.0018312990432605147, - -0.2034837007522583, - 0.01051164511591196, - -1.105310082435608, - 0.29724350571632385, - 0.15604574978351593, - 0.1973688006401062, - 0.44394731521606445, - 0.3974513411521912, - -0.13625948131084442, - 0.9571986198425292, - 0.2257384955883026, - 0.2323588728904724, - -0.5583669543266296, - -0.7854922413825989, - 0.1647188365459442, - -1.6098142862319946, - 0.318587988615036, - -0.13399995863437653, - -0.2172701060771942, - -0.767514705657959, - -0.5813586711883545, - -0.3195130527019501, - -0.04894036799669266, - 0.2929930090904236, - -0.8213384747505188, - 0.07181350141763687, - 0.7469993829727173, - 0.6407455801963806, - 0.16365697979927063, - 0.7870153188705444, - 0.6524736881256104, - 0.6399973630905151, - -0.04992736503481865, - -0.03959266096353531, - -0.2512352466583252, - 0.8448855876922607, - -0.1422702670097351, - 0.1216789186000824, - -1.2647287845611572, - 0.5931149125099182, - 0.7186052203178406, - -0.06118432432413101, - -1.1942816972732544, - -0.17677085101604462, - 0.31543800234794617, - -0.32252824306488037, - 0.8255583047866821, - -0.14529970288276672, - -0.2695446312427521, - -0.33378756046295166, - -0.1653425395488739, - 0.1454019844532013, - -0.3920115828514099, - 0.912214994430542, - -0.7279734015464783, - 0.7374742031097412, - 0.933980405330658, - 0.13429680466651917, - -0.514870285987854, - 0.3989711999893189, - -0.11613689363002776, - 0.4022413492202759, - -0.9990655779838562, - -0.33749932050704956, - -0.4334589838981629, - -1.376373291015625, - -0.2993924915790558, - -0.09454808384180068, - -0.01314175222069025, - -0.001090060803107917, - 0.2137461006641388, - 0.2938512861728668, - 0.17508235573768616, - 0.8260607123374939, - -0.7218498587608337, - 0.2414487451314926, - -0.47296759486198425, - -0.3002610504627228, - -1.238540768623352, - 0.08663805574178696, - 0.6805586218833923, - 0.5909030437469482, - -0.42807504534721375, - -0.22887496650218964, - 0.47537800669670105, - -1.0474627017974854, - 0.6338009238243103, - 0.06548397243022919, - 0.4971011281013489, - 1.3484878540039063 - ] - ], - "regenerate": true - } + "default": [ + 0.2, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ], + "toto": [ + 0.1 + ] } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap index e5d28e450..a9c76227a 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap @@ -3,780 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e1", - "desc": "natural vector", + "id": "e2", + "desc": "natural vector + map", "_vectors": { + "toto": [], "default": { "embeddings": [ [ - -0.2979458272457123, - -0.5288640856742859, - -0.019957859069108963, - -0.18495318293571472, - 0.7429973483085632, - 0.5238497257232666, - 0.432366281747818, - 0.32744166254997253, - 0.0020762972999364138, - -0.9507834911346436, - -0.35097137093544006, - 0.08469701558351517, - -1.4176613092422483, - 0.4647577106952667, - -0.69340580701828, - 1.0372896194458008, - 0.3716741800308227, - 0.06031008064746857, - -0.6152024269104004, - 0.007914665155112743, - 0.7954924702644348, - -0.20773003995418549, - 0.09376765787601472, - 0.04508133605122566, - -0.2084471583366394, - -0.1518009901046753, - 0.018195509910583496, - -0.07044368237257004, - -0.18119366466999057, - -0.4480230510234833, - 0.3822529911994934, - 0.1911812424659729, - 0.4674372375011444, - 0.06963984668254852, - -0.09341949224472046, - 0.005675444379448891, - -0.6774799227714539, - -0.7066726684570313, - -0.39256376028060913, - 0.04005039855837822, - 0.2084812968969345, - -0.7872875928878784, - -0.8205880522727966, - 0.2919981777667999, - -0.06004738807678223, - -0.4907574355602264, - -1.5937862396240234, - 0.24249385297298431, - -0.14709846675395966, - -0.11860740929841997, - -0.8299489617347717, - 0.472964346408844, - -0.497518390417099, - -0.22205302119255063, - -0.4196169078350067, - 0.32697558403015137, - -0.360930860042572, - -0.9789686799049376, - 0.1887447088956833, - -0.403737336397171, - 0.18524253368377688, - 0.3768732249736786, - 0.3666233420372009, - 0.3511938452720642, - 0.6985810995101929, - 0.41721710562705994, - 0.09754953533411026, - 0.6204307079315186, - -1.0762996673583984, - -0.06263761967420578, - -0.7376511693000793, - 0.6849768161773682, - -0.1745152473449707, - -0.40449759364128113, - 0.20757411420345304, - -0.8424443006515503, - 0.330015629529953, - 0.3489064872264862, - 1.0954371690750122, - 0.8487558960914612, - 1.1076823472976685, - 0.61430823802948, - 0.4155903458595276, - 0.4111340939998626, - 0.05753209814429283, - -0.06429877132177353, - -0.765606164932251, - -0.41703930497169495, - -0.508820652961731, - 0.19859947264194489, - -0.16607828438282013, - -0.28112146258354187, - 0.11032675206661224, - 0.38809511065483093, - -0.36498191952705383, - -0.48671194911003113, - 0.6755134463310242, - 0.03958442434668541, - 0.4478721618652344, - -0.10335399955511092, - -0.9546685814857484, - -0.6087718605995178, - 0.17498846352100372, - 0.08320838958024979, - -1.4478336572647097, - -0.605027437210083, - -0.5867993235588074, - -0.14711688458919525, - -0.5447602272033691, - -0.026259321719408035, - -0.6997418403625488, - -0.07349082082509995, - 0.10638900846242905, - -0.7133527398109436, - -0.9396815299987792, - 1.087092399597168, - 1.1885089874267578, - 0.4011896848678589, - -0.4089202582836151, - -0.10938972979784012, - 0.6726722121238708, - 0.24576938152313232, - -0.24247920513153076, - 1.1499971151351929, - 0.47813335061073303, - -0.05331678315997124, - 0.32338133454322815, - 0.4870913326740265, - -0.23144258558750153, - -1.2023426294326782, - 0.2349330335855484, - 1.080536961555481, - 0.29334118962287903, - 0.391574501991272, - -0.15818795561790466, - -0.2948290705680847, - -0.024689948186278343, - 0.06602869182825089, - 0.5937030911445618, - -0.047901444137096405, - -0.512734591960907, - -0.35780075192451477, - 0.28751692175865173, - 0.4298716187477112, - 0.9242428541183472, - -0.17208744585514069, - 0.11515070497989656, - -0.0335976779460907, - -0.3422986567020416, - 0.5344581604003906, - 0.19895796477794647, - 0.33001241087913513, - 0.6390730142593384, - -0.6074934005737305, - -0.2553696632385254, - 0.9644920229911804, - 0.2699219584465027, - 0.6403993368148804, - -0.6380003690719604, - -0.027310986071825027, - 0.638815701007843, - 0.27719101309776306, - -0.13553589582443237, - 0.750195324420929, - 0.1224869191646576, - -0.20613941550254825, - 0.8444448709487915, - 0.16200250387191772, - -0.24750925600528717, - -0.739950954914093, - -0.28443849086761475, - -1.176282525062561, - 0.516107976436615, - 0.3774825632572174, - 0.10906043648719788, - 0.07962015271186829, - 0.7384604215621948, - -0.051241904497146606, - 1.1730090379714966, - -0.4828610122203827, - -1.404372215270996, - 0.8811132311820984, - -0.3839482367038727, - 0.022516896948218346, - -0.0491158664226532, - -0.43027013540267944, - 1.2049334049224854, - -0.27309560775756836, - 0.6883630752563477, - 0.8264574408531189, - -0.5020735263824463, - -0.4874092042446137, - 0.6007202863693237, - -0.4965405762195587, - 1.1302915811538696, - 0.032572727650403976, - -0.3731859028339386, - 0.658271849155426, - -0.9023059010505676, - 0.7400162220001221, - 0.014550759457051754, - -0.19699542224407196, - 0.2319706380367279, - -0.789058268070221, - -0.14905710518360138, - -0.5826214551925659, - 0.207652747631073, - -0.4507439732551574, - -0.3163885474205017, - 0.3604124188423157, - -0.45119962096214294, - 0.3428427278995514, - 0.3005594313144684, - -0.36026081442832947, - 1.1014249324798584, - 0.40884315967559814, - 0.34991952776908875, - -0.1806638240814209, - 0.27440476417541504, - -0.7118373513221741, - 0.4645499587059021, - 0.214790478348732, - -0.2343102991580963, - 0.10500429570674896, - -0.28034430742263794, - 1.2267805337905884, - 1.0561333894729614, - -0.497364342212677, - -0.6143305897712708, - 0.24963727593421936, - -0.33136463165283203, - -0.01473914459347725, - 0.495918869972229, - -0.6985538005828857, - -1.0033197402954102, - 0.35937801003456116, - 0.6325868368148804, - -0.6808838844299316, - 1.0354058742523191, - -0.7214401960372925, - -0.33318862318992615, - 0.874398410320282, - -0.6594992280006409, - 0.6830640435218811, - -0.18534131348133087, - 0.024834271520376205, - 0.19901277124881744, - -0.5992477536201477, - -1.2126628160476685, - -0.9245557188987732, - -0.3898217976093292, - -0.1286519467830658, - 0.4217943847179413, - -0.1143646091222763, - 0.5630772709846497, - -0.5240639448165894, - 0.21152715384960177, - -0.3792001008987427, - 0.8266305327415466, - 1.170984387397766, - -0.8072142004966736, - 0.11382893472909927, - -0.17953898012638092, - -0.1789460331201553, - -0.15078622102737427, - -1.2082908153533936, - -0.7812382578849792, - -0.10903695970773696, - 0.7303897142410278, - -0.39054441452026367, - 0.19511254131793976, - -0.09121843427419662, - 0.22400228679180145, - 0.30143046379089355, - 0.1141919493675232, - 0.48112115263938904, - 0.7307931780815125, - 0.09701362252235413, - -0.2795647978782654, - -0.3997688889503479, - 0.5540812611579895, - 0.564578115940094, - -0.40065160393714905, - -0.3629159033298493, - -0.3789091110229492, - -0.7298538088798523, - -0.6996853351593018, - -0.4477842152118683, - -0.289089560508728, - -0.6430277824401855, - 0.2344944179058075, - 0.3742927014827728, - -0.5079357028007507, - 0.28841453790664673, - 0.06515737622976303, - 0.707315981388092, - 0.09498685598373412, - 0.8365515470504761, - 0.10002726316452026, - -0.7695478200912476, - 0.6264724135398865, - 0.7562043070793152, - -0.23112858831882477, - -0.2871039807796478, - -0.25010058283805847, - 0.2783474028110504, - -0.03224996477365494, - -0.9119359850883484, - -3.6940200328826904, - -0.5099936127662659, - -0.1604711413383484, - 0.17453284561634064, - 0.41759559512138367, - 0.1419190913438797, - -0.11362407356500626, - -0.33312007784843445, - 0.11511333286762238, - 0.4667884409427643, - -0.0031647447030991316, - 0.15879854559898376, - 0.3042248487472534, - 0.5404849052429199, - 0.8515422344207764, - 0.06286454200744629, - 0.43790125846862793, - -0.8682025074958801, - -0.06363756954669952, - 0.5547921657562256, - -0.01483887154608965, - -0.07361344993114471, - -0.929947018623352, - 0.3502565622329712, - -0.5080993175506592, - 1.0380364656448364, - -0.2017953395843506, - 0.21319580078125, - -1.0763001441955566, - -0.556368887424469, - 0.1949922740459442, - -0.6445739269256592, - 0.6791343688964844, - 0.21188358962535855, - 0.3736183941364288, - -0.21800459921360016, - 0.7597446441650391, - -0.3732394874095917, - -0.4710160195827484, - 0.025146087631583217, - 0.05341297015547752, - -0.9522109627723694, - -0.6000866889953613, - -0.08469046652317047, - 0.5966026186943054, - 0.3444081246852875, - -0.461188405752182, - -0.5279349088668823, - 0.10296865552663804, - 0.5175143480300903, - -0.20671147108078003, - 0.13392412662506104, - 0.4812754988670349, - 0.2993808686733246, - -0.3005635440349579, - 0.5141698122024536, - -0.6239235401153564, - 0.2877119481563568, - -0.4452739953994751, - 0.5621107816696167, - 0.5047508478164673, - -0.4226335883140564, - -0.18578553199768064, - -1.1967322826385498, - 0.28178197145462036, - -0.8692031502723694, - -1.1812998056411743, - -1.4526212215423584, - 0.4645712077617645, - 0.9327932000160216, - -0.6560136675834656, - 0.461549699306488, - -0.5621527433395386, - -1.328449010848999, - -0.08676894754171371, - 0.00021918353741057217, - -0.18864136934280396, - 0.1259666532278061, - 0.18240638077259064, - -0.14919660985469818, - -0.8965857625007629, - -0.7539900541305542, - 0.013973715715110302, - 0.504276692867279, - -0.704748272895813, - -0.6428424119949341, - 0.6303996443748474, - -0.5404738187789917, - -0.31176653504371643, - -0.21262824535369873, - 0.18736739456653595, - -0.7998970746994019, - 0.039946746081113815, - 0.7390344738960266, - 0.4283199906349182, - 0.3795057237148285, - 0.07204607129096985, - -0.9230587482452391, - 0.9440426230430604, - 0.26272690296173096, - 0.5598306655883789, - -1.0520871877670288, - -0.2677186131477356, - -0.1888762265443802, - 0.30426350235939026, - 0.4746131896972656, - -0.5746733546257019, - -0.4197768568992615, - 0.8565112948417664, - -0.6767723560333252, - 0.23448683321475983, - -0.2010004222393036, - 0.4112907350063324, - -0.6497949957847595, - -0.418667733669281, - -0.4950824975967407, - 0.44438859820365906, - 1.026281714439392, - 0.482397586107254, - -0.26220494508743286, - -0.3640787005424499, - 0.5907743573188782, - -0.8771642446517944, - 0.09708411991596222, - -0.3671700060367584, - 0.4331349730491638, - 0.619417667388916, - -0.2684665620326996, - -0.5123821496963501, - -0.1502324342727661, - -0.012190685607492924, - 0.3580845892429352, - 0.8617186546325684, - 0.3493645489215851, - 1.0270192623138428, - 0.18297909200191495, - -0.5881339311599731, - -0.1733516901731491, - -0.5040576457977295, - -0.340370237827301, - -0.26767754554748535, - -0.28570041060447693, - -0.032928116619586945, - 0.6029254794120789, - 0.17397655546665192, - 0.09346921741962431, - 0.27815181016921997, - -0.46699589490890503, - -0.8148876428604126, - -0.3964351713657379, - 0.3812595009803772, - 0.13547226786613464, - 0.7126688361167908, - -0.3473474085330963, - -0.06573959439992905, - -0.6483767032623291, - 1.4808889627456665, - 0.30924928188323975, - -0.5085946917533875, - -0.8613000512123108, - 0.3048902451992035, - -0.4241599142551422, - 0.15909206867218018, - 0.5764641761779785, - -0.07879110425710678, - 1.015336513519287, - 0.07599356025457382, - -0.7025855779647827, - 0.30047643184661865, - -0.35094937682151794, - 0.2522146999835968, - -0.2338722199201584, - -0.8326804637908936, - -0.13695412874221802, - -0.03452421352267265, - 0.47974953055381775, - -0.18385636806488037, - 0.32438594102859497, - 0.1797013282775879, - 0.787494957447052, - -0.12579888105392456, - -0.07507286965847015, - -0.4389670491218567, - 0.2720070779323578, - 0.8138866424560547, - 0.01974171027541161, - -0.3057698905467987, - -0.6709924936294556, - 0.0885881632566452, - -0.2862754464149475, - 0.03475658595561981, - -0.1285519152879715, - 0.3838353455066681, - -0.2944154739379883, - -0.4204859137535095, - -0.4416137933731079, - 0.13426260650157928, - 0.36733248829841614, - 0.573428750038147, - -0.14928072690963745, - -0.026076916605234143, - 0.33286052942276, - -0.5340145826339722, - -0.17279052734375, - -0.01154550164937973, - -0.6620771884918213, - 0.18390542268753052, - -0.08265615254640579, - -0.2489682286977768, - 0.2429984211921692, - -0.044153645634651184, - -0.986578404903412, - -0.33574509620666504, - -0.5387663841247559, - 0.19767941534519196, - 0.12540718913078308, - -0.3403128981590271, - -0.4154576361179352, - 0.17275673151016235, - 0.09407442808151244, - -0.5414086580276489, - 0.4393929839134216, - 0.1725579798221588, - -0.4998118281364441, - -0.6926208138465881, - 0.16552448272705078, - 0.6659538149833679, - -0.10949844866991044, - 0.986426830291748, - 0.01748848147690296, - 0.4003709554672241, - -0.5430638194084167, - 0.35347291827201843, - 0.6887399554252625, - 0.08274628221988678, - 0.13407137989997864, - -0.591465950012207, - 0.3446292281150818, - 0.6069018244743347, - 0.1935492902994156, - -0.0989871397614479, - 0.07008486241102219, - -0.8503749370574951, - -0.09507356584072112, - 0.6259510517120361, - 0.13934025168418884, - 0.06392545253038406, - -0.4112265408039093, - -0.08475656062364578, - 0.4974113404750824, - -0.30606114864349365, - 1.111435890197754, - -0.018766529858112335, - -0.8422622680664063, - 0.4325508773326874, - -0.2832120656967163, - -0.4859798848628998, - -0.41498348116874695, - 0.015977520495653152, - 0.5292825698852539, - 0.4538311660289765, - 1.1328668594360352, - 0.22632671892642975, - 0.7918671369552612, - 0.33401933312416077, - 0.7306135296821594, - 0.3548600673675537, - 0.12506209313869476, - 0.8573207855224609, - -0.5818327069282532, - -0.6953738927841187, - -1.6171947717666626, - -0.1699674427509308, - 0.6318262815475464, - -0.05671752244234085, - -0.28145185112953186, - -0.3976689279079437, - -0.2041076272726059, - -0.5495951175689697, - -0.5152917504310608, - -0.9309796094894408, - 0.101932130753994, - 0.1367802917957306, - 0.1490798443555832, - 0.5304336547851563, - -0.5082434415817261, - 0.06688683480024338, - 0.14657628536224365, - -0.782435953617096, - 0.2962816655635834, - 0.6965363621711731, - 0.8496337532997131, - -0.3042965829372406, - 0.04343798756599426, - 0.0330701619386673, - -0.5662598013877869, - 1.1086925268173218, - 0.756072998046875, - -0.204134538769722, - 0.2404300570487976, - -0.47848284244537354, - 1.3659011125564575, - 0.5645433068275452, - -0.15836156904697418, - 0.43395575881004333, - 0.5944653749465942, - 1.0043466091156006, - -0.49446743726730347, - -0.5954391360282898, - 0.5341240763664246, - 0.020598189905285835, - -0.4036853015422821, - 0.4473709762096405, - 1.1998231410980225, - -0.9317775368690492, - -0.23321466147899628, - 0.2052552700042725, - -0.7423108816146851, - -0.19917210936546328, - -0.1722569614648819, - -0.034072667360305786, - -0.00671181408688426, - 0.46396249532699585, - -0.1372445821762085, - 0.053376372903585434, - 0.7392690777778625, - -0.38447609543800354, - 0.07497968524694443, - 0.5197252631187439, - 1.3746477365493774, - 0.9060075879096984, - 0.20000585913658145, - -0.4053704142570496, - 0.7497360110282898, - -0.34087055921554565, - -1.101803183555603, - 0.273650586605072, - -0.5125769376754761, - 0.22472351789474487, - 0.480757474899292, - -0.19845178723335263, - 0.8857700824737549, - 0.30752456188201904, - 1.1109285354614258, - -0.6768012642860413, - 0.524367094039917, - -0.22495046257972717, - -0.4224412739276886, - 0.40753406286239624, - -0.23133376240730288, - 0.3297771215438843, - 0.4905449151992798, - -0.6813114285469055, - -0.7543983459472656, - -0.5599071383476257, - 0.14351597428321838, - -0.029278717935085297, - -0.3970443606376648, - -0.303079217672348, - 0.24161772429943085, - 0.008353390730917454, - -0.0062365154735744, - 1.0824860334396362, - -0.3704061508178711, - -1.0337258577346802, - 0.04638749733567238, - 1.163011074066162, - -0.31737643480300903, - 0.013986887410283089, - 0.19223114848136905, - -0.2260770797729492, - -0.210910826921463, - -1.0191949605941772, - 0.22356095910072327, - 0.09353553503751756, - 0.18096882104873657, - 0.14867214858531952, - 0.43408671021461487, - -0.33312076330184937, - 0.8173948526382446, - 0.6428242921829224, - 0.20215003192424777, - -0.6634518504142761, - -0.4132290482521057, - 0.29815030097961426, - -1.579406976699829, - -0.0981958732008934, - -0.03941014781594277, - 0.1709178239107132, - -0.5481140613555908, - -0.5338194966316223, - -0.3528362512588501, - -0.11561278253793716, - -0.21793591976165771, - -1.1570470333099363, - 0.2157980799674988, - 0.42083489894866943, - 0.9639263153076172, - 0.09747201204299928, - 0.15671424567699432, - 0.4034591615200043, - 0.6728067994117737, - -0.5216875672340393, - 0.09657668322324751, - -0.2416689097881317, - 0.747975766658783, - 0.1021689772605896, - 0.11652665585279463, - -1.0484966039657593, - 0.8489304780960083, - 0.7169828414916992, - -0.09012343734502792, - -1.3173753023147583, - 0.057890523225069046, - -0.006231260951608419, - -0.1018214002251625, - 0.936040461063385, - -0.0502331368625164, - -0.4284322261810303, - -0.38209280371665955, - -0.22668412327766416, - 0.0782942995429039, - -0.4881664514541626, - 0.9268959760665894, - 0.001867273123934865, - 0.42261114716529846, - 0.8283362984657288, - 0.4256294071674347, - -0.7965338826179504, - 0.4840078353881836, - -0.19861412048339844, - 0.33977967500686646, - -0.4604192078113556, - -0.3107339143753052, - -0.2839638590812683, - -1.5734281539916992, - 0.005220232997089624, - 0.09239906817674635, - -0.7828494906425476, - -0.1397123783826828, - 0.2576255202293396, - 0.21372435986995697, - -0.23169949650764465, - 0.4016408920288086, - -0.462497353553772, - -0.2186472862958908, - -0.5617868900299072, - -0.3649831712245941, - -1.1585862636566162, - -0.08222806453704834, - 0.931126832962036, - 0.4327389597892761, - -0.46451422572135925, - -0.5430706143379211, - -0.27434298396110535, - -0.9479129314422609, - 0.1845661848783493, - 0.3972720205783844, - 0.4883299469947815, - 1.04031240940094 + -0.05189208313822746, + -0.9273212552070618, + 0.1443813145160675, + 0.0932632014155388, + 0.2665371894836426, + 0.36266782879829407, + 0.6402910947799683, + 0.32014018297195435, + 0.030915971845388412, + -0.9312191605567932, + -0.3718109726905823, + -0.2700554132461548, + -1.1014580726623535, + 0.9154956936836244, + -0.3406888246536255, + 1.0077725648880005, + 0.6577560901641846, + -0.3955195546150207, + -0.4148270785808563, + 0.1855088472366333, + 0.5062315464019775, + -0.3632686734199524, + -0.2277890294790268, + 0.2560805082321167, + -0.3853609561920166, + -0.1604762226343155, + -0.13947471976280212, + -0.20147813856601715, + -0.4466346800327301, + -0.3761846721172333, + 0.1443382054567337, + 0.18205296993255615, + 0.49359792470932007, + -0.22538000345230105, + -0.4996317625045776, + -0.22734887897968292, + -0.6034309267997742, + -0.7857939600944519, + -0.34923747181892395, + -0.3466345965862274, + 0.21176661550998688, + -0.5101462006568909, + -0.3403083384037018, + 0.000315118464641273, + 0.236465722322464, + -0.10246097296476364, + -1.3013339042663574, + 0.3419138789176941, + -0.32963496446609497, + -0.0901619717478752, + -0.5426247119903564, + 0.22656650841236117, + -0.44758284091949463, + 0.14151698350906372, + -0.1089438870549202, + 0.5500766634941101, + -0.670711100101471, + -0.6227269768714905, + 0.3894464075565338, + -0.27609574794769287, + 0.7028202414512634, + -0.19697771966457367, + 0.328511506319046, + 0.5063360929489136, + 0.4065195322036743, + 0.2614171802997589, + -0.30274391174316406, + 1.0393824577331543, + -0.7742937207221985, + -0.7874112129211426, + -0.6749666929244995, + 0.5190866589546204, + 0.004123548045754433, + -0.28312963247299194, + -0.038731709122657776, + -1.0142987966537476, + -0.09519586712121964, + 0.8755272626876831, + 0.4876938760280609, + 0.7811151742935181, + 0.85174959897995, + 0.11826585978269576, + 0.5373436808586121, + 0.3649002015590668, + 0.19064077734947205, + -0.00287026260048151, + -0.7305403351783752, + -0.015206154435873032, + -0.7899249196052551, + 0.19407285749912265, + 0.08596625179052353, + -0.28976231813430786, + -0.1525907665491104, + 0.3798313438892365, + 0.050306469202041626, + -0.5697937607765198, + 0.4219021201133728, + 0.276252806186676, + 0.1559903472661972, + 0.10030482709407806, + -0.4043720066547394, + -0.1969818025827408, + 0.5739826560020447, + 0.2116064727306366, + -1.4620544910430908, + -0.7802462577819824, + -0.24739810824394223, + -0.09791352599859238, + -0.4413802027702331, + 0.21549351513385773, + -0.9520436525344848, + -0.08762510865926743, + 0.08154498040676117, + -0.6154940724372864, + -1.01079523563385, + 0.885427713394165, + 0.6967288851737976, + 0.27186504006385803, + -0.43194177746772766, + -0.11248451471328735, + 0.7576630711555481, + 0.4998855590820313, + 0.0264343973249197, + 0.9872855544090272, + 0.5634694695472717, + 0.053698331117630005, + 0.19410227239131927, + 0.3570743501186371, + -0.23670297861099243, + -0.9114483594894408, + 0.07884842902421951, + 0.7318344116210938, + 0.44630110263824463, + 0.08745364099740982, + -0.347101628780365, + -0.4314247667789459, + -0.5060274004936218, + 0.003706763498485088, + 0.44320008158683777, + -0.00788921769708395, + -0.1368623524904251, + -0.17391923069953918, + 0.14473655819892883, + 0.10927865654230118, + 0.6974599361419678, + 0.005052129738032818, + -0.016953065991401672, + -0.1256176233291626, + -0.036742497235536575, + 0.5591985583305359, + -0.37619709968566895, + 0.22429119050502777, + 0.5403043031692505, + -0.8603790998458862, + -0.3456307053565979, + 0.9292937517166138, + 0.5074859261512756, + 0.6310645937919617, + -0.3091641068458557, + 0.46902573108673096, + 0.7891915440559387, + 0.4499550759792328, + 0.2744995653629303, + 0.2712305784225464, + -0.04349074140191078, + -0.3638863265514374, + 0.7839881777763367, + 0.7352104783058167, + -0.19457511603832245, + -0.5957832932472229, + -0.43704694509506226, + -1.084769368171692, + 0.4904985725879669, + 0.5385226011276245, + 0.1891629993915558, + 0.12338479608297348, + 0.8315675258636475, + -0.07830192148685455, + 1.0916285514831543, + -0.28066861629486084, + -1.3585069179534912, + 0.5203898549079895, + 0.08678033947944641, + -0.2566044330596924, + 0.09484415501356123, + -0.0180208683013916, + 1.0264745950698853, + -0.023572135716676712, + 0.5864979028701782, + 0.7625196576118469, + -0.2543414533138275, + -0.8877770900726318, + 0.7611982822418213, + -0.06220436468720436, + 0.937336564064026, + 0.2704363465309143, + -0.37733694911003113, + 0.5076137781143188, + -0.30641937255859375, + 0.6252772808074951, + -0.0823579877614975, + -0.03736555948853493, + 0.4131673276424408, + -0.6514252424240112, + 0.12918265163898468, + -0.4483584463596344, + 0.6750786304473877, + -0.37008383870124817, + -0.02324833907186985, + 0.38027650117874146, + -0.26374951004981995, + 0.4346931278705597, + 0.42882832884788513, + -0.48798441886901855, + 1.1882442235946655, + 0.5132288336753845, + 0.5284568667411804, + -0.03538886830210686, + 0.29620853066444397, + -1.0683696269989014, + 0.25936177372932434, + 0.10404160618782043, + -0.25796034932136536, + 0.027896970510482788, + -0.09225251525640488, + 1.4811025857925415, + 0.641173779964447, + -0.13838383555412292, + -0.3437179923057556, + 0.5667019486427307, + -0.5400741696357727, + 0.31090837717056274, + 0.6470608115196228, + -0.3747067153453827, + -0.7364534735679626, + -0.07431528717279434, + 0.5173454880714417, + -0.6578747034072876, + 0.7107478976249695, + -0.7918999791145325, + -0.0648345872759819, + 0.609937846660614, + -0.7329513430595398, + 0.9741371870040894, + 0.17912346124649048, + -0.02658769302070141, + 0.5162150859832764, + -0.3978803157806397, + -0.7833885550498962, + -0.6497276425361633, + -0.3898126780986786, + -0.0952848568558693, + 0.2663288116455078, + -0.1604052186012268, + 0.373076468706131, + -0.8357769250869751, + -0.05217683315277099, + -0.2680160701274872, + 0.8389158248901367, + 0.6833611130714417, + -0.6712407469749451, + 0.7406917214393616, + -0.44522786140441895, + -0.34645363688468933, + -0.27384576201438904, + -0.9878405928611756, + -0.8166060447692871, + 0.06268279999494553, + 0.38567957282066345, + -0.3274703919887543, + 0.5296315550804138, + -0.11810623109340668, + 0.23029841482639313, + 0.08616159111261368, + -0.2195747196674347, + 0.09430307894945145, + 0.4057176411151886, + 0.4892159104347229, + -0.1636916548013687, + -0.6071445345878601, + 0.41256585717201233, + 0.622254490852356, + -0.41223976016044617, + -0.6686707139015198, + -0.7474371790885925, + -0.8509522080421448, + -0.16754287481307983, + -0.9078601002693176, + -0.29653599858283997, + -0.5020652413368225, + 0.4692700505256653, + 0.01281109917908907, + -0.16071580350399017, + 0.03388889133930206, + -0.020511148497462273, + 0.5027827024459839, + -0.20729811489582065, + 0.48107290267944336, + 0.33669769763946533, + -0.5275911688804626, + 0.48271527886390686, + 0.2738940715789795, + -0.033152539283037186, + -0.13629786670207977, + -0.05965912342071533, + -0.26200807094573975, + 0.04002794995903969, + -0.34095603227615356, + -3.986898899078369, + -0.46819332242012024, + -0.422744482755661, + -0.169097900390625, + 0.6008929014205933, + 0.058016058057546616, + -0.11401277780532836, + -0.3077819049358368, + -0.09595538675785063, + 0.6723822355270386, + 0.19367831945419312, + 0.28304359316825867, + 0.1609862744808197, + 0.7567598819732666, + 0.6889985799789429, + 0.06907720118761063, + -0.04188092052936554, + -0.7434936165809631, + 0.13321782648563385, + 0.8456063270568848, + -0.10364038497209548, + -0.45084846019744873, + -0.4758241474628449, + 0.43882066011428833, + -0.6432598829269409, + 0.7217311859130859, + -0.24189773201942444, + 0.12737572193145752, + -1.1008601188659668, + -0.3305315673351288, + 0.14614742994308472, + -0.7819333076477051, + 0.5287120342254639, + -0.055538054555654526, + 0.1877404749393463, + -0.6907662153244019, + 0.5616975426673889, + -0.4611121714115143, + -0.26109233498573303, + -0.12898315489292145, + -0.3724522292613983, + -0.7191406488418579, + -0.4425233602523804, + -0.644108235836029, + 0.8424481153488159, + 0.17532426118850708, + -0.5121750235557556, + -0.6467239260673523, + -0.0008507720194756985, + 0.7866212129592896, + -0.02644744887948036, + -0.005045140627771616, + 0.015782782807946205, + 0.16334445774555206, + -0.1913367658853531, + -0.13697923719882965, + -0.6684983372688293, + 0.18346354365348816, + -0.341105580329895, + 0.5427411198616028, + 0.3779832422733307, + -0.6778115034103394, + -0.2931850254535675, + -0.8805161714553833, + -0.4212774932384491, + -0.5368952751159668, + -1.3937891721725464, + -1.225494146347046, + 0.4276703894138336, + 1.1205668449401855, + -0.6005299687385559, + 0.15732505917549133, + -0.3914784789085388, + -1.357046604156494, + -0.4707142114639282, + -0.1497287154197693, + -0.25035548210144043, + -0.34328439831733704, + 0.39083412289619446, + 0.1623048633337021, + -0.9275814294815063, + -0.6430015563964844, + 0.2973862886428833, + 0.5580436587333679, + -0.6232585310935974, + -0.6611042022705078, + 0.4015969038009643, + -1.0232892036437988, + -0.2585645020008087, + -0.5431421399116516, + 0.5021264553070068, + -0.48601630330085754, + -0.010242084041237833, + 0.5862035155296326, + 0.7316920161247253, + 0.4036808013916016, + 0.4269520044326782, + -0.705938458442688, + 0.7747307419776917, + 0.10164368897676468, + 0.7887958884239197, + -0.9612497091293336, + 0.12755516171455383, + 0.06812842190265656, + -0.022603651508688927, + 0.14722754061222076, + -0.5588505268096924, + -0.20689940452575684, + 0.3557641804218292, + -0.6812759637832642, + 0.2860803008079529, + -0.38954633474349976, + 0.1759403496980667, + -0.5678874850273132, + -0.1692986786365509, + -0.14578519761562347, + 0.5711379051208496, + 1.0208125114440918, + 0.7759483456611633, + -0.372348427772522, + -0.5460885763168335, + 0.7190321683883667, + -0.6914990544319153, + 0.13365162909030914, + -0.4854792356491089, + 0.4054908752441406, + 0.4502798914909363, + -0.3041122555732727, + -0.06726965308189392, + -0.05570871382951737, + -0.0455719493329525, + 0.4785125255584717, + 0.8867972493171692, + 0.4107886850833893, + 0.6121342182159424, + -0.20477132499217987, + -0.5598517656326294, + -0.6443566679954529, + -0.5905212759971619, + -0.5571200251579285, + 0.17573799192905426, + -0.28621870279312134, + 0.1685224026441574, + 0.09719007462263109, + -0.04223639518022537, + -0.28623101115226746, + -0.1449810117483139, + -0.3789580464363098, + -0.5227636098861694, + -0.049728814512491226, + 0.7849089503288269, + 0.16792525351047516, + 0.9849340915679932, + -0.6559549570083618, + 0.35723909735679626, + -0.6822739243507385, + 1.2873116731643677, + 0.19993330538272855, + 0.03512010723352432, + -0.6972134113311768, + 0.18453484773635864, + -0.2437680810689926, + 0.2156416028738022, + 0.5230382680892944, + 0.22020135819911957, + 0.8314080238342285, + 0.15627102553844452, + -0.7330264449119568, + 0.3888184726238251, + -0.22034703195095065, + 0.5457669496536255, + -0.48084837198257446, + -0.45576658844947815, + -0.09287727624177931, + -0.06968110054731369, + 0.35125672817230225, + -0.4278119504451752, + 0.2038476765155792, + 0.11392722278833388, + 0.9433983564376832, + -0.4097744226455689, + 0.035297419875860214, + -0.4274404048919678, + -0.25100165605545044, + 1.0943366289138794, + -0.07634022831916809, + -0.2925529479980469, + -0.7512530088424683, + 0.2649727463722229, + -0.4078235328197479, + -0.3372223973274231, + 0.05190162733197212, + 0.005654910113662481, + -0.0001571219472680241, + -0.35445958375930786, + -0.7837416529655457, + 0.1500556766986847, + 0.4383024573326111, + 0.6099548935890198, + 0.05951934307813645, + -0.21325334906578064, + 0.0199207104742527, + -0.22704418003559113, + -0.6481077671051025, + 0.37442275881767273, + -1.015955924987793, + 0.38637226819992065, + -0.06489371508359909, + -0.494120329618454, + 0.3469836115837097, + 0.15402406454086304, + -0.7660972476005554, + -0.7053225040435791, + -0.25964751839637756, + 0.014004424214363098, + -0.2860170006752014, + -0.17565494775772095, + -0.45117494463920593, + -0.0031954257283359766, + 0.09676837921142578, + -0.514464259147644, + 0.41698193550109863, + -0.21642713248729703, + -0.5398141145706177, + -0.3647628426551819, + 0.37005379796028137, + 0.239425927400589, + -0.08833975344896317, + 0.934946596622467, + -0.48340797424316406, + 0.6241437792778015, + -0.7253676652908325, + -0.04303571209311485, + 1.1125205755233765, + -0.15692919492721558, + -0.2914651036262512, + -0.5117168426513672, + 0.21365483105182648, + 0.4924402534961701, + 0.5269662141799927, + 0.0352792888879776, + -0.149167999625206, + -0.6019760370254517, + 0.08245442807674408, + 0.4900692105293274, + 0.518824577331543, + -0.00005570516441366635, + -0.553304135799408, + 0.22217543423175812, + 0.5047767758369446, + 0.135724738240242, + 1.1511540412902832, + -0.3541218340396881, + -0.9712511897087096, + 0.8353699445724487, + -0.39227569103240967, + -0.9117669463157654, + -0.26349931955337524, + 0.05597023293375969, + 0.20695461332798004, + 0.3178807199001312, + 1.0663238763809204, + 0.5062212347984314, + 0.7288597822189331, + 0.09899299591779707, + 0.553720235824585, + 0.675009548664093, + -0.20067055523395536, + 0.3138423264026642, + -0.6886593103408813, + -0.2910398542881012, + -1.3186300992965698, + -0.4684459865093231, + -0.095743365585804, + -0.1257995069026947, + -0.4858281314373016, + -0.4935407340526581, + -0.3266896903514862, + -0.3928797245025635, + -0.40803104639053345, + -0.9975396394729614, + 0.4229583740234375, + 0.37309643626213074, + 0.4431034922599793, + 0.30364808440208435, + -0.3765178918838501, + 0.5616499185562134, + 0.16904796659946442, + -0.7343707084655762, + 0.2560209631919861, + 0.6166825294494629, + 0.3200829327106476, + -0.4483652710914612, + 0.16224201023578644, + -0.31495288014411926, + -0.42713335156440735, + 0.7270734906196594, + 0.7049484848976135, + -0.0571461021900177, + 0.04477125033736229, + -0.6647796034812927, + 1.183672308921814, + 0.36199676990509033, + 0.046881116926670074, + 0.4515796303749085, + 0.9278061985969543, + 0.31471705436706543, + -0.7073333859443665, + -0.3443860113620758, + 0.5440067052841187, + -0.15020819008350372, + -0.541202962398529, + 0.5203295946121216, + 1.2192286252975464, + -0.9983593225479126, + -0.18758884072303772, + 0.2758221924304962, + -0.6511523723602295, + -0.1584404855966568, + -0.236241415143013, + 0.2692437767982483, + -0.4941152036190033, + 0.4987454116344452, + -0.3331359028816223, + 0.3163745701313019, + 0.745529294013977, + -0.2905873656272888, + 0.13602906465530396, + 0.4679684340953827, + 1.0555986166000366, + 1.075700044631958, + 0.5368486046791077, + -0.5118206739425659, + 0.8668332099914551, + -0.5726966857910156, + -0.7811751961708069, + 0.1938626915216446, + -0.1929349899291992, + 0.1757766306400299, + 0.6384295225143433, + 0.26462844014167786, + 0.9542630314826964, + 0.19313029944896695, + 1.264248013496399, + -0.6304428577423096, + 0.0487106591463089, + -0.16211535036563873, + -0.7894763350486755, + 0.3582514822483063, + -0.04153040423989296, + 0.635784387588501, + 0.6554391980171204, + -0.47010496258735657, + -0.8302040696144104, + -0.1350124627351761, + 0.2568812072277069, + 0.13614831864833832, + -0.2563649117946625, + -1.0434694290161133, + 0.3232482671737671, + 0.47882452607154846, + 0.4298652410507202, + 1.0563770532608032, + -0.28917592763900757, + -0.8533256649971008, + 0.10648339986801147, + 0.6376127004623413, + -0.20832888782024384, + 0.2370245456695557, + 0.0018312990432605147, + -0.2034837007522583, + 0.01051164511591196, + -1.105310082435608, + 0.29724350571632385, + 0.15604574978351593, + 0.1973688006401062, + 0.44394731521606445, + 0.3974513411521912, + -0.13625948131084442, + 0.9571986198425292, + 0.2257384955883026, + 0.2323588728904724, + -0.5583669543266296, + -0.7854922413825989, + 0.1647188365459442, + -1.6098142862319946, + 0.318587988615036, + -0.13399995863437653, + -0.2172701060771942, + -0.767514705657959, + -0.5813586711883545, + -0.3195130527019501, + -0.04894036799669266, + 0.2929930090904236, + -0.8213384747505188, + 0.07181350141763687, + 0.7469993829727173, + 0.6407455801963806, + 0.16365697979927063, + 0.7870153188705444, + 0.6524736881256104, + 0.6399973630905151, + -0.04992736503481865, + -0.03959266096353531, + -0.2512352466583252, + 0.8448855876922607, + -0.1422702670097351, + 0.1216789186000824, + -1.2647287845611572, + 0.5931149125099182, + 0.7186052203178406, + -0.06118432432413101, + -1.1942816972732544, + -0.17677085101604462, + 0.31543800234794617, + -0.32252824306488037, + 0.8255583047866821, + -0.14529970288276672, + -0.2695446312427521, + -0.33378756046295166, + -0.1653425395488739, + 0.1454019844532013, + -0.3920115828514099, + 0.912214994430542, + -0.7279734015464783, + 0.7374742031097412, + 0.933980405330658, + 0.13429680466651917, + -0.514870285987854, + 0.3989711999893189, + -0.11613689363002776, + 0.4022413492202759, + -0.9990655779838562, + -0.33749932050704956, + -0.4334589838981629, + -1.376373291015625, + -0.2993924915790558, + -0.09454808384180068, + -0.01314175222069025, + -0.001090060803107917, + 0.2137461006641388, + 0.2938512861728668, + 0.17508235573768616, + 0.8260607123374939, + -0.7218498587608337, + 0.2414487451314926, + -0.47296759486198425, + -0.3002610504627228, + -1.238540768623352, + 0.08663805574178696, + 0.6805586218833923, + 0.5909030437469482, + -0.42807504534721375, + -0.22887496650218964, + 0.47537800669670105, + -1.0474627017974854, + 0.6338009238243103, + 0.06548397243022919, + 0.4971011281013489, + 1.3484878540039063 ] ], "regenerate": true diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap index 4bd0e2c3e..e5d28e450 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap @@ -3,778 +3,783 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e0", - "desc": "overriden vector", + "id": "e1", + "desc": "natural vector", "_vectors": { - "default": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ] + "default": { + "embeddings": [ + [ + -0.2979458272457123, + -0.5288640856742859, + -0.019957859069108963, + -0.18495318293571472, + 0.7429973483085632, + 0.5238497257232666, + 0.432366281747818, + 0.32744166254997253, + 0.0020762972999364138, + -0.9507834911346436, + -0.35097137093544006, + 0.08469701558351517, + -1.4176613092422483, + 0.4647577106952667, + -0.69340580701828, + 1.0372896194458008, + 0.3716741800308227, + 0.06031008064746857, + -0.6152024269104004, + 0.007914665155112743, + 0.7954924702644348, + -0.20773003995418549, + 0.09376765787601472, + 0.04508133605122566, + -0.2084471583366394, + -0.1518009901046753, + 0.018195509910583496, + -0.07044368237257004, + -0.18119366466999057, + -0.4480230510234833, + 0.3822529911994934, + 0.1911812424659729, + 0.4674372375011444, + 0.06963984668254852, + -0.09341949224472046, + 0.005675444379448891, + -0.6774799227714539, + -0.7066726684570313, + -0.39256376028060913, + 0.04005039855837822, + 0.2084812968969345, + -0.7872875928878784, + -0.8205880522727966, + 0.2919981777667999, + -0.06004738807678223, + -0.4907574355602264, + -1.5937862396240234, + 0.24249385297298431, + -0.14709846675395966, + -0.11860740929841997, + -0.8299489617347717, + 0.472964346408844, + -0.497518390417099, + -0.22205302119255063, + -0.4196169078350067, + 0.32697558403015137, + -0.360930860042572, + -0.9789686799049376, + 0.1887447088956833, + -0.403737336397171, + 0.18524253368377688, + 0.3768732249736786, + 0.3666233420372009, + 0.3511938452720642, + 0.6985810995101929, + 0.41721710562705994, + 0.09754953533411026, + 0.6204307079315186, + -1.0762996673583984, + -0.06263761967420578, + -0.7376511693000793, + 0.6849768161773682, + -0.1745152473449707, + -0.40449759364128113, + 0.20757411420345304, + -0.8424443006515503, + 0.330015629529953, + 0.3489064872264862, + 1.0954371690750122, + 0.8487558960914612, + 1.1076823472976685, + 0.61430823802948, + 0.4155903458595276, + 0.4111340939998626, + 0.05753209814429283, + -0.06429877132177353, + -0.765606164932251, + -0.41703930497169495, + -0.508820652961731, + 0.19859947264194489, + -0.16607828438282013, + -0.28112146258354187, + 0.11032675206661224, + 0.38809511065483093, + -0.36498191952705383, + -0.48671194911003113, + 0.6755134463310242, + 0.03958442434668541, + 0.4478721618652344, + -0.10335399955511092, + -0.9546685814857484, + -0.6087718605995178, + 0.17498846352100372, + 0.08320838958024979, + -1.4478336572647097, + -0.605027437210083, + -0.5867993235588074, + -0.14711688458919525, + -0.5447602272033691, + -0.026259321719408035, + -0.6997418403625488, + -0.07349082082509995, + 0.10638900846242905, + -0.7133527398109436, + -0.9396815299987792, + 1.087092399597168, + 1.1885089874267578, + 0.4011896848678589, + -0.4089202582836151, + -0.10938972979784012, + 0.6726722121238708, + 0.24576938152313232, + -0.24247920513153076, + 1.1499971151351929, + 0.47813335061073303, + -0.05331678315997124, + 0.32338133454322815, + 0.4870913326740265, + -0.23144258558750153, + -1.2023426294326782, + 0.2349330335855484, + 1.080536961555481, + 0.29334118962287903, + 0.391574501991272, + -0.15818795561790466, + -0.2948290705680847, + -0.024689948186278343, + 0.06602869182825089, + 0.5937030911445618, + -0.047901444137096405, + -0.512734591960907, + -0.35780075192451477, + 0.28751692175865173, + 0.4298716187477112, + 0.9242428541183472, + -0.17208744585514069, + 0.11515070497989656, + -0.0335976779460907, + -0.3422986567020416, + 0.5344581604003906, + 0.19895796477794647, + 0.33001241087913513, + 0.6390730142593384, + -0.6074934005737305, + -0.2553696632385254, + 0.9644920229911804, + 0.2699219584465027, + 0.6403993368148804, + -0.6380003690719604, + -0.027310986071825027, + 0.638815701007843, + 0.27719101309776306, + -0.13553589582443237, + 0.750195324420929, + 0.1224869191646576, + -0.20613941550254825, + 0.8444448709487915, + 0.16200250387191772, + -0.24750925600528717, + -0.739950954914093, + -0.28443849086761475, + -1.176282525062561, + 0.516107976436615, + 0.3774825632572174, + 0.10906043648719788, + 0.07962015271186829, + 0.7384604215621948, + -0.051241904497146606, + 1.1730090379714966, + -0.4828610122203827, + -1.404372215270996, + 0.8811132311820984, + -0.3839482367038727, + 0.022516896948218346, + -0.0491158664226532, + -0.43027013540267944, + 1.2049334049224854, + -0.27309560775756836, + 0.6883630752563477, + 0.8264574408531189, + -0.5020735263824463, + -0.4874092042446137, + 0.6007202863693237, + -0.4965405762195587, + 1.1302915811538696, + 0.032572727650403976, + -0.3731859028339386, + 0.658271849155426, + -0.9023059010505676, + 0.7400162220001221, + 0.014550759457051754, + -0.19699542224407196, + 0.2319706380367279, + -0.789058268070221, + -0.14905710518360138, + -0.5826214551925659, + 0.207652747631073, + -0.4507439732551574, + -0.3163885474205017, + 0.3604124188423157, + -0.45119962096214294, + 0.3428427278995514, + 0.3005594313144684, + -0.36026081442832947, + 1.1014249324798584, + 0.40884315967559814, + 0.34991952776908875, + -0.1806638240814209, + 0.27440476417541504, + -0.7118373513221741, + 0.4645499587059021, + 0.214790478348732, + -0.2343102991580963, + 0.10500429570674896, + -0.28034430742263794, + 1.2267805337905884, + 1.0561333894729614, + -0.497364342212677, + -0.6143305897712708, + 0.24963727593421936, + -0.33136463165283203, + -0.01473914459347725, + 0.495918869972229, + -0.6985538005828857, + -1.0033197402954102, + 0.35937801003456116, + 0.6325868368148804, + -0.6808838844299316, + 1.0354058742523191, + -0.7214401960372925, + -0.33318862318992615, + 0.874398410320282, + -0.6594992280006409, + 0.6830640435218811, + -0.18534131348133087, + 0.024834271520376205, + 0.19901277124881744, + -0.5992477536201477, + -1.2126628160476685, + -0.9245557188987732, + -0.3898217976093292, + -0.1286519467830658, + 0.4217943847179413, + -0.1143646091222763, + 0.5630772709846497, + -0.5240639448165894, + 0.21152715384960177, + -0.3792001008987427, + 0.8266305327415466, + 1.170984387397766, + -0.8072142004966736, + 0.11382893472909927, + -0.17953898012638092, + -0.1789460331201553, + -0.15078622102737427, + -1.2082908153533936, + -0.7812382578849792, + -0.10903695970773696, + 0.7303897142410278, + -0.39054441452026367, + 0.19511254131793976, + -0.09121843427419662, + 0.22400228679180145, + 0.30143046379089355, + 0.1141919493675232, + 0.48112115263938904, + 0.7307931780815125, + 0.09701362252235413, + -0.2795647978782654, + -0.3997688889503479, + 0.5540812611579895, + 0.564578115940094, + -0.40065160393714905, + -0.3629159033298493, + -0.3789091110229492, + -0.7298538088798523, + -0.6996853351593018, + -0.4477842152118683, + -0.289089560508728, + -0.6430277824401855, + 0.2344944179058075, + 0.3742927014827728, + -0.5079357028007507, + 0.28841453790664673, + 0.06515737622976303, + 0.707315981388092, + 0.09498685598373412, + 0.8365515470504761, + 0.10002726316452026, + -0.7695478200912476, + 0.6264724135398865, + 0.7562043070793152, + -0.23112858831882477, + -0.2871039807796478, + -0.25010058283805847, + 0.2783474028110504, + -0.03224996477365494, + -0.9119359850883484, + -3.6940200328826904, + -0.5099936127662659, + -0.1604711413383484, + 0.17453284561634064, + 0.41759559512138367, + 0.1419190913438797, + -0.11362407356500626, + -0.33312007784843445, + 0.11511333286762238, + 0.4667884409427643, + -0.0031647447030991316, + 0.15879854559898376, + 0.3042248487472534, + 0.5404849052429199, + 0.8515422344207764, + 0.06286454200744629, + 0.43790125846862793, + -0.8682025074958801, + -0.06363756954669952, + 0.5547921657562256, + -0.01483887154608965, + -0.07361344993114471, + -0.929947018623352, + 0.3502565622329712, + -0.5080993175506592, + 1.0380364656448364, + -0.2017953395843506, + 0.21319580078125, + -1.0763001441955566, + -0.556368887424469, + 0.1949922740459442, + -0.6445739269256592, + 0.6791343688964844, + 0.21188358962535855, + 0.3736183941364288, + -0.21800459921360016, + 0.7597446441650391, + -0.3732394874095917, + -0.4710160195827484, + 0.025146087631583217, + 0.05341297015547752, + -0.9522109627723694, + -0.6000866889953613, + -0.08469046652317047, + 0.5966026186943054, + 0.3444081246852875, + -0.461188405752182, + -0.5279349088668823, + 0.10296865552663804, + 0.5175143480300903, + -0.20671147108078003, + 0.13392412662506104, + 0.4812754988670349, + 0.2993808686733246, + -0.3005635440349579, + 0.5141698122024536, + -0.6239235401153564, + 0.2877119481563568, + -0.4452739953994751, + 0.5621107816696167, + 0.5047508478164673, + -0.4226335883140564, + -0.18578553199768064, + -1.1967322826385498, + 0.28178197145462036, + -0.8692031502723694, + -1.1812998056411743, + -1.4526212215423584, + 0.4645712077617645, + 0.9327932000160216, + -0.6560136675834656, + 0.461549699306488, + -0.5621527433395386, + -1.328449010848999, + -0.08676894754171371, + 0.00021918353741057217, + -0.18864136934280396, + 0.1259666532278061, + 0.18240638077259064, + -0.14919660985469818, + -0.8965857625007629, + -0.7539900541305542, + 0.013973715715110302, + 0.504276692867279, + -0.704748272895813, + -0.6428424119949341, + 0.6303996443748474, + -0.5404738187789917, + -0.31176653504371643, + -0.21262824535369873, + 0.18736739456653595, + -0.7998970746994019, + 0.039946746081113815, + 0.7390344738960266, + 0.4283199906349182, + 0.3795057237148285, + 0.07204607129096985, + -0.9230587482452391, + 0.9440426230430604, + 0.26272690296173096, + 0.5598306655883789, + -1.0520871877670288, + -0.2677186131477356, + -0.1888762265443802, + 0.30426350235939026, + 0.4746131896972656, + -0.5746733546257019, + -0.4197768568992615, + 0.8565112948417664, + -0.6767723560333252, + 0.23448683321475983, + -0.2010004222393036, + 0.4112907350063324, + -0.6497949957847595, + -0.418667733669281, + -0.4950824975967407, + 0.44438859820365906, + 1.026281714439392, + 0.482397586107254, + -0.26220494508743286, + -0.3640787005424499, + 0.5907743573188782, + -0.8771642446517944, + 0.09708411991596222, + -0.3671700060367584, + 0.4331349730491638, + 0.619417667388916, + -0.2684665620326996, + -0.5123821496963501, + -0.1502324342727661, + -0.012190685607492924, + 0.3580845892429352, + 0.8617186546325684, + 0.3493645489215851, + 1.0270192623138428, + 0.18297909200191495, + -0.5881339311599731, + -0.1733516901731491, + -0.5040576457977295, + -0.340370237827301, + -0.26767754554748535, + -0.28570041060447693, + -0.032928116619586945, + 0.6029254794120789, + 0.17397655546665192, + 0.09346921741962431, + 0.27815181016921997, + -0.46699589490890503, + -0.8148876428604126, + -0.3964351713657379, + 0.3812595009803772, + 0.13547226786613464, + 0.7126688361167908, + -0.3473474085330963, + -0.06573959439992905, + -0.6483767032623291, + 1.4808889627456665, + 0.30924928188323975, + -0.5085946917533875, + -0.8613000512123108, + 0.3048902451992035, + -0.4241599142551422, + 0.15909206867218018, + 0.5764641761779785, + -0.07879110425710678, + 1.015336513519287, + 0.07599356025457382, + -0.7025855779647827, + 0.30047643184661865, + -0.35094937682151794, + 0.2522146999835968, + -0.2338722199201584, + -0.8326804637908936, + -0.13695412874221802, + -0.03452421352267265, + 0.47974953055381775, + -0.18385636806488037, + 0.32438594102859497, + 0.1797013282775879, + 0.787494957447052, + -0.12579888105392456, + -0.07507286965847015, + -0.4389670491218567, + 0.2720070779323578, + 0.8138866424560547, + 0.01974171027541161, + -0.3057698905467987, + -0.6709924936294556, + 0.0885881632566452, + -0.2862754464149475, + 0.03475658595561981, + -0.1285519152879715, + 0.3838353455066681, + -0.2944154739379883, + -0.4204859137535095, + -0.4416137933731079, + 0.13426260650157928, + 0.36733248829841614, + 0.573428750038147, + -0.14928072690963745, + -0.026076916605234143, + 0.33286052942276, + -0.5340145826339722, + -0.17279052734375, + -0.01154550164937973, + -0.6620771884918213, + 0.18390542268753052, + -0.08265615254640579, + -0.2489682286977768, + 0.2429984211921692, + -0.044153645634651184, + -0.986578404903412, + -0.33574509620666504, + -0.5387663841247559, + 0.19767941534519196, + 0.12540718913078308, + -0.3403128981590271, + -0.4154576361179352, + 0.17275673151016235, + 0.09407442808151244, + -0.5414086580276489, + 0.4393929839134216, + 0.1725579798221588, + -0.4998118281364441, + -0.6926208138465881, + 0.16552448272705078, + 0.6659538149833679, + -0.10949844866991044, + 0.986426830291748, + 0.01748848147690296, + 0.4003709554672241, + -0.5430638194084167, + 0.35347291827201843, + 0.6887399554252625, + 0.08274628221988678, + 0.13407137989997864, + -0.591465950012207, + 0.3446292281150818, + 0.6069018244743347, + 0.1935492902994156, + -0.0989871397614479, + 0.07008486241102219, + -0.8503749370574951, + -0.09507356584072112, + 0.6259510517120361, + 0.13934025168418884, + 0.06392545253038406, + -0.4112265408039093, + -0.08475656062364578, + 0.4974113404750824, + -0.30606114864349365, + 1.111435890197754, + -0.018766529858112335, + -0.8422622680664063, + 0.4325508773326874, + -0.2832120656967163, + -0.4859798848628998, + -0.41498348116874695, + 0.015977520495653152, + 0.5292825698852539, + 0.4538311660289765, + 1.1328668594360352, + 0.22632671892642975, + 0.7918671369552612, + 0.33401933312416077, + 0.7306135296821594, + 0.3548600673675537, + 0.12506209313869476, + 0.8573207855224609, + -0.5818327069282532, + -0.6953738927841187, + -1.6171947717666626, + -0.1699674427509308, + 0.6318262815475464, + -0.05671752244234085, + -0.28145185112953186, + -0.3976689279079437, + -0.2041076272726059, + -0.5495951175689697, + -0.5152917504310608, + -0.9309796094894408, + 0.101932130753994, + 0.1367802917957306, + 0.1490798443555832, + 0.5304336547851563, + -0.5082434415817261, + 0.06688683480024338, + 0.14657628536224365, + -0.782435953617096, + 0.2962816655635834, + 0.6965363621711731, + 0.8496337532997131, + -0.3042965829372406, + 0.04343798756599426, + 0.0330701619386673, + -0.5662598013877869, + 1.1086925268173218, + 0.756072998046875, + -0.204134538769722, + 0.2404300570487976, + -0.47848284244537354, + 1.3659011125564575, + 0.5645433068275452, + -0.15836156904697418, + 0.43395575881004333, + 0.5944653749465942, + 1.0043466091156006, + -0.49446743726730347, + -0.5954391360282898, + 0.5341240763664246, + 0.020598189905285835, + -0.4036853015422821, + 0.4473709762096405, + 1.1998231410980225, + -0.9317775368690492, + -0.23321466147899628, + 0.2052552700042725, + -0.7423108816146851, + -0.19917210936546328, + -0.1722569614648819, + -0.034072667360305786, + -0.00671181408688426, + 0.46396249532699585, + -0.1372445821762085, + 0.053376372903585434, + 0.7392690777778625, + -0.38447609543800354, + 0.07497968524694443, + 0.5197252631187439, + 1.3746477365493774, + 0.9060075879096984, + 0.20000585913658145, + -0.4053704142570496, + 0.7497360110282898, + -0.34087055921554565, + -1.101803183555603, + 0.273650586605072, + -0.5125769376754761, + 0.22472351789474487, + 0.480757474899292, + -0.19845178723335263, + 0.8857700824737549, + 0.30752456188201904, + 1.1109285354614258, + -0.6768012642860413, + 0.524367094039917, + -0.22495046257972717, + -0.4224412739276886, + 0.40753406286239624, + -0.23133376240730288, + 0.3297771215438843, + 0.4905449151992798, + -0.6813114285469055, + -0.7543983459472656, + -0.5599071383476257, + 0.14351597428321838, + -0.029278717935085297, + -0.3970443606376648, + -0.303079217672348, + 0.24161772429943085, + 0.008353390730917454, + -0.0062365154735744, + 1.0824860334396362, + -0.3704061508178711, + -1.0337258577346802, + 0.04638749733567238, + 1.163011074066162, + -0.31737643480300903, + 0.013986887410283089, + 0.19223114848136905, + -0.2260770797729492, + -0.210910826921463, + -1.0191949605941772, + 0.22356095910072327, + 0.09353553503751756, + 0.18096882104873657, + 0.14867214858531952, + 0.43408671021461487, + -0.33312076330184937, + 0.8173948526382446, + 0.6428242921829224, + 0.20215003192424777, + -0.6634518504142761, + -0.4132290482521057, + 0.29815030097961426, + -1.579406976699829, + -0.0981958732008934, + -0.03941014781594277, + 0.1709178239107132, + -0.5481140613555908, + -0.5338194966316223, + -0.3528362512588501, + -0.11561278253793716, + -0.21793591976165771, + -1.1570470333099363, + 0.2157980799674988, + 0.42083489894866943, + 0.9639263153076172, + 0.09747201204299928, + 0.15671424567699432, + 0.4034591615200043, + 0.6728067994117737, + -0.5216875672340393, + 0.09657668322324751, + -0.2416689097881317, + 0.747975766658783, + 0.1021689772605896, + 0.11652665585279463, + -1.0484966039657593, + 0.8489304780960083, + 0.7169828414916992, + -0.09012343734502792, + -1.3173753023147583, + 0.057890523225069046, + -0.006231260951608419, + -0.1018214002251625, + 0.936040461063385, + -0.0502331368625164, + -0.4284322261810303, + -0.38209280371665955, + -0.22668412327766416, + 0.0782942995429039, + -0.4881664514541626, + 0.9268959760665894, + 0.001867273123934865, + 0.42261114716529846, + 0.8283362984657288, + 0.4256294071674347, + -0.7965338826179504, + 0.4840078353881836, + -0.19861412048339844, + 0.33977967500686646, + -0.4604192078113556, + -0.3107339143753052, + -0.2839638590812683, + -1.5734281539916992, + 0.005220232997089624, + 0.09239906817674635, + -0.7828494906425476, + -0.1397123783826828, + 0.2576255202293396, + 0.21372435986995697, + -0.23169949650764465, + 0.4016408920288086, + -0.462497353553772, + -0.2186472862958908, + -0.5617868900299072, + -0.3649831712245941, + -1.1585862636566162, + -0.08222806453704834, + 0.931126832962036, + 0.4327389597892761, + -0.46451422572135925, + -0.5430706143379211, + -0.27434298396110535, + -0.9479129314422609, + 0.1845661848783493, + 0.3972720205783844, + 0.4883299469947815, + 1.04031240940094 + ] + ], + "regenerate": true + } } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap new file mode 100644 index 000000000..4bd0e2c3e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap @@ -0,0 +1,780 @@ +--- +source: dump/src/reader/mod.rs +expression: document +--- +{ + "id": "e0", + "desc": "overriden vector", + "_vectors": { + "default": [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ] + } +} From 2b120b89e4ab2f981d9c175ed447d7f95d582511 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 12:08:59 +0200 Subject: [PATCH 48/60] update the test now that the embedder must be specified --- meilisearch/src/routes/indexes/similar.rs | 2 +- meilisearch/tests/vector/binary_quantized.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 210a52b75..f94a02987 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -103,7 +103,7 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; let (embedder_name, embedder, quantized) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 0f3d01c2d..05c1d47ef 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -364,7 +364,8 @@ async fn binary_quantize_clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; snapshot!(documents, @r###" { "hits": [], From 163f8023a1ada3f633c295138ad46730cb541dbc Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 12:13:25 +0200 Subject: [PATCH 49/60] remove debug println --- milli/src/update/settings.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 63db5237c..fb1710ca9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -425,13 +425,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - println!("inside reindex"); // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(self.wtxn)? == 0 { return Ok(()); } - println!("didnt early exit"); let transform = Transform::new( self.wtxn, From 6dde41cc46c74c0c817f551018aea62eff0bee25 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:25:38 +0200 Subject: [PATCH 50/60] stop using a local version of arroy and instead point to the git repo with the rev --- Cargo.lock | 24 +++--------------------- index-scheduler/Cargo.toml | 2 +- milli/Cargo.toml | 3 +-- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 485ab1305..992b4b536 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -387,6 +387,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arroy" version = "0.4.0" +source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d" dependencies = [ "bytemuck", "byteorder", @@ -402,25 +403,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "arroy" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" -dependencies = [ - "bytemuck", - "byteorder", - "heed", - "log", - "memmap2", - "ordered-float", - "rand", - "rayon", - "roaring", - "tempfile", - "thiserror", -] - [[package]] name = "assert-json-diff" version = "2.0.2" @@ -2573,7 +2555,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "arroy", "big_s", "bincode", "crossbeam", @@ -3563,7 +3545,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy 0.4.0", + "arroy", "big_s", "bimap", "bincode", diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 6f099a025..432a86382 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -40,7 +40,7 @@ ureq = "2.10.0" uuid = { version = "1.10.0", features = ["serde", "v4"] } [dev-dependencies] -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.39.0", features = ["json", "redactions"] } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 4d82d0a03..e45554898 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -80,8 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -# arroy = "0.4.0" -arroy = { path = "../../arroy" } +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } From b6b73fe41ccd4574bbc6575c40d487f95f8759e3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:41:14 +0200 Subject: [PATCH 51/60] Update milli/src/update/settings.rs Co-authored-by: Louis Dureuil --- milli/src/update/settings.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index fb1710ca9..57459c708 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1095,6 +1095,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { + // ignore actions that are not possible for a new embedder if matches!(action.reindex(), Some(ReindexAction::FullReindex)) && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { From 2564ec1496f1b81da396484c67be08d57b55da9b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:41:44 +0200 Subject: [PATCH 52/60] Update milli/src/index.rs Co-authored-by: Louis Dureuil --- milli/src/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 2dd6c6541..51547c4de 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1619,7 +1619,7 @@ impl Index { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); // Here we don't care about the dimensions, but we want to know if we can read - // in the database or if its medata are missing. + // in the database or if its metadata are missing because there is no document with that many vectors. match reader.dimensions(rtxn) { Ok(_) => Some(Ok(reader)), Err(arroy::Error::MissingMetadata(_)) => None, From fd43c6c4044085a37b2a2820bf0101eff1fe8b2e Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:51:29 +0200 Subject: [PATCH 53/60] Improve the error message explaining you can't un-bq an embedder --- meilisearch/tests/vector/binary_quantized.rs | 2 +- milli/src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 05c1d47ef..d3fe3c824 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -317,7 +317,7 @@ async fn try_to_disable_binary_quantization() { } }, "error": { - "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization", + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/milli/src/error.rs b/milli/src/error.rs index f09f48c2e..400d3d3be 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -259,7 +259,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, #[error( - "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization" + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors." )] InvalidDisableBinaryQuantization { embedder_name: String }, #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] From b8fd85a46d4f5034b32805a0e59653cffbe2dd58 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:57:38 +0200 Subject: [PATCH 54/60] Get rids of useless collect before an iteration on the readers --- milli/src/search/new/vector_sort.rs | 7 ++----- milli/src/search/similar.rs | 8 ++------ 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 47480c315..de1dacbe7 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -52,15 +52,12 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: Result> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized).collect(); - let target = &self.target; let mut results = Vec::new(); - for reader in readers?.iter() { + for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; + reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index ac56e10fa..a9072f723 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -70,14 +70,10 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: Result> = - self.index.arroy_readers(self.rtxn, embedder_index, self.quantized).collect(); - let readers = readers?; - let mut results = Vec::new(); - for reader in readers.iter() { - let nns_by_item = reader.nns_by_item( + for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) { + let nns_by_item = reader?.nns_by_item( self.rtxn, self.id, self.limit + self.offset + 1, From f77661ec44f48eddc4fe7f4538815322e363b0c4 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 16:08:59 +0200 Subject: [PATCH 55/60] Update Charabia v0.9.1 --- Cargo.lock | 6 +++--- meilisearch-types/src/locales.rs | 11 ++++++++--- meilisearch/tests/search/locales.rs | 6 +++--- milli/Cargo.toml | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1af89d382..922ec606d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -933,9 +933,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d" +checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798" dependencies = [ "aho-corasick", "csv", @@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index 36c45aac3..8d746779e 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -39,12 +39,14 @@ macro_rules! make_locale { pub enum Locale { $($iso_639_1,)+ $($iso_639_3,)+ + Cmn, } impl From for Locale { fn from(other: milli::tokenizer::Language) -> Locale { match other { $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + milli::tokenizer::Language::Cmn => Locale::Cmn, } } } @@ -54,6 +56,7 @@ macro_rules! make_locale { match other { $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + Locale::Cmn => milli::tokenizer::Language::Cmn, } } } @@ -65,6 +68,7 @@ macro_rules! make_locale { let locale = match s { $($iso_639_1_str => Locale::$iso_639_1,)+ $($iso_639_3_str => Locale::$iso_639_3,)+ + "cmn" => Locale::Cmn, _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), }; @@ -79,8 +83,9 @@ macro_rules! make_locale { impl std::fmt::Display for LocaleFormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"]; + valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right))); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", ")) } } @@ -99,7 +104,6 @@ make_locale!( (Bg, "bg") => (Bul, "bul"), (Ca, "ca") => (Cat, "cat"), (Cs, "cs") => (Ces, "ces"), - (Zh, "zh") => (Cmn, "cmn"), (Da, "da") => (Dan, "dan"), (De, "de") => (Deu, "deu"), (El, "el") => (Ell, "ell"), @@ -157,5 +161,6 @@ make_locale!( (Uz, "uz") => (Uzb, "uzb"), (Vi, "vi") => (Vie, "vie"), (Yi, "yi") => (Yid, "yid"), + (Zh, "zh") => (Zho, "zho"), (Zu, "zu") => (Zul, "zul"), ); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 53bcece06..408a01b0b 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -922,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -935,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 8a5ba366f..e0a85ea8f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.9.1" bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } byteorder = "1.5.0" -charabia = { version = "0.9.0", default-features = false } +charabia = { version = "0.9.1", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.13" deserr = "0.6.2" From 7d6768e4c4841cca4f01c098b9829c63a6ed1377 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 13:30:07 +0200 Subject: [PATCH 56/60] Add german tokenization pipeline --- meilisearch-types/Cargo.toml | 2 ++ meilisearch/Cargo.toml | 1 + milli/Cargo.toml | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 73306c4dc..cb4937e57 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -66,3 +66,5 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] +# force german character recomposition +german = ["milli/german"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index e614ecc6a..2a16e1017 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] +german = ["meilisearch-types/german"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index e0a85ea8f..8c77f338c 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -107,6 +107,7 @@ all-tokenizations = [ "charabia/khmer", "charabia/vietnamese", "charabia/swedish-recomposition", + "charabia/german-segmentation", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -139,6 +140,9 @@ khmer = ["charabia/khmer"] # allow vietnamese specialized tokenization vietnamese = ["charabia/vietnamese"] +# allow german specialized tokenization +german = ["charabia/german-segmentation"] + # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] From 465afe01b2e48e351cff7bd41fe7a65549958eaf Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 13:41:57 +0200 Subject: [PATCH 57/60] Add test for German --- meilisearch/tests/search/locales.rs | 70 +++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 408a01b0b..c01d854e2 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1143,6 +1143,7 @@ async fn facet_search_with_localized_attributes() { } "###); } + #[actix_rt::test] async fn swedish_search() { let server = Server::new().await; @@ -1265,3 +1266,72 @@ async fn swedish_search() { ) .await; } + +#[actix_rt::test] +async fn german_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": 1, "product": "Interkulturalität"}, + {"id": 2, "product": "Wissensorganisation"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["deu"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search( + json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Interkulturalität" + } + ], + "query": "kulturalität", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + + index + .search( + json!({"q": "organisation", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Wissensorganisation" + } + ], + "query": "organisation", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} From 1113c42de07ef13961331b79d6ca72ba6b06dbdf Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:18:36 +0200 Subject: [PATCH 58/60] fix broken comments --- milli/src/update/settings.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 57459c708..6e2b53d58 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1289,7 +1289,7 @@ impl InnerIndexSettingsDiff { new_settings.embedding_configs.inner_as_ref() { let was_quantized = - old_settings.embedding_configs.get(&embedder_name).map_or(false, |conf| conf.2); + old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2); // skip embedders that don't use document templates if !config.uses_document_template() { continue; @@ -1307,9 +1307,9 @@ impl InnerIndexSettingsDiff { std::collections::btree_map::Entry::Occupied(entry) => { let EmbedderAction { was_quantized: _, - is_being_quantized: _, // We are deleting this embedder, so no point in regeneration - write_back: _, // We are already fully reindexing - reindex: _, // We are already regenerating prompts + is_being_quantized: _, + write_back: _, // We are deleting this embedder, so no point in regeneration + reindex: _, // We are already fully reindexing } = entry.get(); } }; From 74199f328d91f1aacf7ccf51a56b6f6f3cb519b8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:27:34 +0200 Subject: [PATCH 59/60] Make clippy happy --- milli/src/search/new/mod.rs | 1 + milli/src/search/similar.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 4babc7acc..f7c590360 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( Ok(ranking_rules) } +#[allow(clippy::too_many_arguments)] fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index a9072f723..0cb8d723d 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -22,6 +22,7 @@ pub struct Similar<'a> { } impl<'a> Similar<'a> { + #[allow(clippy::too_many_arguments)] pub fn new( id: DocumentId, offset: usize, From f6483cf15db57eb3b6b72762b7ddf17f9054356f Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:47:06 +0200 Subject: [PATCH 60/60] apply review comment --- milli/src/index.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 51547c4de..c47896df7 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1648,25 +1648,15 @@ impl Index { let mut res = BTreeMap::new(); let embedding_configs = self.embedding_configs(rtxn)?; for config in embedding_configs { - // TODO: return internal error instead let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); - let embedder_id = (embedder_id as u16) << 8; - - let mut embeddings = Vec::new(); - 'vectors: for i in 0..=u8::MAX { - let reader = ArroyWrapper::new( - self.vector_arroy, - embedder_id | (i as u16), - config.config.quantized(), - ); - match reader.item_vector(rtxn, docid) { - Err(arroy::Error::MissingMetadata(_)) => break 'vectors, - Err(err) => return Err(err.into()), - Ok(None) => break 'vectors, - Ok(Some(embedding)) => embeddings.push(embedding), - }; - } - + let embeddings = self + .arroy_readers(rtxn, embedder_id, config.config.quantized()) + .map_while(|reader| { + reader + .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into())) + .transpose() + }) + .collect::>>()?; res.insert(config.name.to_owned(), embeddings); } Ok(res)