mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Merge #4647
4647: Feature: get similar documents r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4610 ## What does this PR do? [Usage](https://meilisearch.notion.site/Get-similar-documents-usage-540919ca755c4da0b7cdee273db3f290) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
75d5c0ae1f
@ -189,3 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
|
|||||||
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
merge_with_error_impl_take_error_message!(ParseTaskStatusError);
|
||||||
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
merge_with_error_impl_take_error_message!(IndexUidFormatError);
|
||||||
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
|
||||||
|
merge_with_error_impl_take_error_message!(InvalidSimilarId);
|
||||||
|
@ -239,18 +239,23 @@ InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarFilter , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchHighlightPostTag , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchHighlightPreTag , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchHitsPerPage , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarLimit , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchLimit , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -259,7 +264,9 @@ InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
|||||||
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchVector , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
|
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||||
@ -322,7 +329,8 @@ UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
|
|||||||
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||||
|
|
||||||
// Experimental features
|
// Experimental features
|
||||||
VectorEmbeddingError , InvalidRequest , BAD_REQUEST
|
VectorEmbeddingError , InvalidRequest , BAD_REQUEST ;
|
||||||
|
NotFoundSimilarId , InvalidRequest , BAD_REQUEST
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ErrorCode for JoinError {
|
impl ErrorCode for JoinError {
|
||||||
@ -486,6 +494,17 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for deserr_codes::InvalidSimilarId {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"the value of `id` is invalid. \
|
||||||
|
A document identifier can be of type integer or string, \
|
||||||
|
only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! internal_error {
|
macro_rules! internal_error {
|
||||||
($target:ty : $($other:path), *) => {
|
($target:ty : $($other:path), *) => {
|
||||||
|
@ -25,6 +25,18 @@ impl SearchAggregator {
|
|||||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SimilarAggregator;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
impl SimilarAggregator {
|
||||||
|
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct MultiSearchAggregator;
|
pub struct MultiSearchAggregator;
|
||||||
|
|
||||||
@ -66,6 +78,8 @@ impl Analytics for MockAnalytics {
|
|||||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||||
|
fn get_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||||
|
fn post_similar(&self, _aggregate: super::SimilarAggregator) {}
|
||||||
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||||
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||||
fn add_documents(
|
fn add_documents(
|
||||||
|
@ -22,6 +22,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
|||||||
#[cfg(not(feature = "analytics"))]
|
#[cfg(not(feature = "analytics"))]
|
||||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||||
#[cfg(not(feature = "analytics"))]
|
#[cfg(not(feature = "analytics"))]
|
||||||
|
pub type SimilarAggregator = mock_analytics::SimilarAggregator;
|
||||||
|
#[cfg(not(feature = "analytics"))]
|
||||||
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||||
#[cfg(not(feature = "analytics"))]
|
#[cfg(not(feature = "analytics"))]
|
||||||
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||||
@ -32,6 +34,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
|||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
|
pub type SimilarAggregator = segment_analytics::SimilarAggregator;
|
||||||
|
#[cfg(feature = "analytics")]
|
||||||
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||||
#[cfg(feature = "analytics")]
|
#[cfg(feature = "analytics")]
|
||||||
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||||
@ -86,6 +90,12 @@ pub trait Analytics: Sync + Send {
|
|||||||
/// This method should be called to aggregate a post search
|
/// This method should be called to aggregate a post search
|
||||||
fn post_search(&self, aggregate: SearchAggregator);
|
fn post_search(&self, aggregate: SearchAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a get similar request
|
||||||
|
fn get_similar(&self, aggregate: SimilarAggregator);
|
||||||
|
|
||||||
|
/// This method should be called to aggregate a post similar request
|
||||||
|
fn post_similar(&self, aggregate: SimilarAggregator);
|
||||||
|
|
||||||
/// This method should be called to aggregate a post array of searches
|
/// This method should be called to aggregate a post array of searches
|
||||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||||
|
|
||||||
|
@ -36,8 +36,9 @@ use crate::routes::indexes::facet_search::FacetSearchQuery;
|
|||||||
use crate::routes::{create_all_stats, Stats};
|
use crate::routes::{create_all_stats, Stats};
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||||
|
DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
@ -73,6 +74,8 @@ pub enum AnalyticsMsg {
|
|||||||
BatchMessage(Track),
|
BatchMessage(Track),
|
||||||
AggregateGetSearch(SearchAggregator),
|
AggregateGetSearch(SearchAggregator),
|
||||||
AggregatePostSearch(SearchAggregator),
|
AggregatePostSearch(SearchAggregator),
|
||||||
|
AggregateGetSimilar(SimilarAggregator),
|
||||||
|
AggregatePostSimilar(SimilarAggregator),
|
||||||
AggregatePostMultiSearch(MultiSearchAggregator),
|
AggregatePostMultiSearch(MultiSearchAggregator),
|
||||||
AggregatePostFacetSearch(FacetSearchAggregator),
|
AggregatePostFacetSearch(FacetSearchAggregator),
|
||||||
AggregateAddDocuments(DocumentsAggregator),
|
AggregateAddDocuments(DocumentsAggregator),
|
||||||
@ -149,6 +152,8 @@ impl SegmentAnalytics {
|
|||||||
update_documents_aggregator: DocumentsAggregator::default(),
|
update_documents_aggregator: DocumentsAggregator::default(),
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
|
||||||
|
get_similar_aggregator: SimilarAggregator::default(),
|
||||||
|
post_similar_aggregator: SimilarAggregator::default(),
|
||||||
});
|
});
|
||||||
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone()));
|
||||||
|
|
||||||
@ -184,6 +189,14 @@ impl super::Analytics for SegmentAnalytics {
|
|||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_similar(&self, aggregate: SimilarAggregator) {
|
||||||
|
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn post_similar(&self, aggregate: SimilarAggregator) {
|
||||||
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate));
|
||||||
|
}
|
||||||
|
|
||||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
||||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
||||||
}
|
}
|
||||||
@ -379,6 +392,8 @@ pub struct Segment {
|
|||||||
update_documents_aggregator: DocumentsAggregator,
|
update_documents_aggregator: DocumentsAggregator,
|
||||||
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
get_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
post_fetch_documents_aggregator: DocumentsFetchAggregator,
|
||||||
|
get_similar_aggregator: SimilarAggregator,
|
||||||
|
post_similar_aggregator: SimilarAggregator,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Segment {
|
impl Segment {
|
||||||
@ -441,6 +456,8 @@ impl Segment {
|
|||||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
|
||||||
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
|
||||||
|
Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg),
|
||||||
|
Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg),
|
||||||
None => (),
|
None => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -494,6 +511,8 @@ impl Segment {
|
|||||||
update_documents_aggregator,
|
update_documents_aggregator,
|
||||||
get_fetch_documents_aggregator,
|
get_fetch_documents_aggregator,
|
||||||
post_fetch_documents_aggregator,
|
post_fetch_documents_aggregator,
|
||||||
|
get_similar_aggregator,
|
||||||
|
post_similar_aggregator,
|
||||||
} = self;
|
} = self;
|
||||||
|
|
||||||
if let Some(get_search) =
|
if let Some(get_search) =
|
||||||
@ -541,6 +560,18 @@ impl Segment {
|
|||||||
{
|
{
|
||||||
let _ = self.batcher.push(post_fetch_documents).await;
|
let _ = self.batcher.push(post_fetch_documents).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(get_similar_documents) =
|
||||||
|
take(get_similar_aggregator).into_event(user, "Similar GET")
|
||||||
|
{
|
||||||
|
let _ = self.batcher.push(get_similar_documents).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(post_similar_documents) =
|
||||||
|
take(post_similar_aggregator).into_event(user, "Similar POST")
|
||||||
|
{
|
||||||
|
let _ = self.batcher.push(post_similar_documents).await;
|
||||||
|
}
|
||||||
let _ = self.batcher.flush().await;
|
let _ = self.batcher.flush().await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1558,3 +1589,235 @@ impl DocumentsFetchAggregator {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct SimilarAggregator {
|
||||||
|
timestamp: Option<OffsetDateTime>,
|
||||||
|
|
||||||
|
// context
|
||||||
|
user_agents: HashSet<String>,
|
||||||
|
|
||||||
|
// requests
|
||||||
|
total_received: usize,
|
||||||
|
total_succeeded: usize,
|
||||||
|
time_spent: BinaryHeap<usize>,
|
||||||
|
|
||||||
|
// filter
|
||||||
|
filter_with_geo_radius: bool,
|
||||||
|
filter_with_geo_bounding_box: bool,
|
||||||
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
|
filter_sum_of_criteria_terms: usize,
|
||||||
|
// every time a request has a filter, this field must be incremented by one
|
||||||
|
filter_total_number_of_criteria: usize,
|
||||||
|
used_syntax: HashMap<String, usize>,
|
||||||
|
|
||||||
|
// Whether a non-default embedder was specified
|
||||||
|
embedder: bool,
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
max_limit: usize,
|
||||||
|
max_offset: usize,
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
max_attributes_to_retrieve: usize,
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SimilarAggregator {
|
||||||
|
#[allow(clippy::field_reassign_with_default)]
|
||||||
|
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
|
||||||
|
let SimilarQuery {
|
||||||
|
id: _,
|
||||||
|
embedder,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
attributes_to_retrieve: _,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
filter,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
let mut ret = Self::default();
|
||||||
|
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||||
|
|
||||||
|
ret.total_received = 1;
|
||||||
|
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
|
||||||
|
ret.filter_total_number_of_criteria = 1;
|
||||||
|
|
||||||
|
let syntax = match filter {
|
||||||
|
Value::String(_) => "string".to_string(),
|
||||||
|
Value::Array(values) => {
|
||||||
|
if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) {
|
||||||
|
"mixed".to_string()
|
||||||
|
} else {
|
||||||
|
"array".to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => "none".to_string(),
|
||||||
|
};
|
||||||
|
// convert the string to a HashMap
|
||||||
|
ret.used_syntax.insert(syntax, 1);
|
||||||
|
|
||||||
|
let stringified_filters = filter.to_string();
|
||||||
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||||
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||||
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.max_limit = *limit;
|
||||||
|
ret.max_offset = *offset;
|
||||||
|
|
||||||
|
ret.show_ranking_score = *show_ranking_score;
|
||||||
|
ret.show_ranking_score_details = *show_ranking_score_details;
|
||||||
|
|
||||||
|
ret.embedder = embedder.is_some();
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn succeed(&mut self, result: &SimilarResult) {
|
||||||
|
let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result;
|
||||||
|
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||||
|
|
||||||
|
self.time_spent.push(*processing_time_ms as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate one [SimilarAggregator] into another.
|
||||||
|
pub fn aggregate(&mut self, mut other: Self) {
|
||||||
|
let Self {
|
||||||
|
timestamp,
|
||||||
|
user_agents,
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
ref mut time_spent,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
embedder,
|
||||||
|
} = other;
|
||||||
|
|
||||||
|
if self.timestamp.is_none() {
|
||||||
|
self.timestamp = timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// context
|
||||||
|
for user_agent in user_agents.into_iter() {
|
||||||
|
self.user_agents.insert(user_agent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// request
|
||||||
|
self.total_received = self.total_received.saturating_add(total_received);
|
||||||
|
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||||
|
self.time_spent.append(time_spent);
|
||||||
|
|
||||||
|
// filter
|
||||||
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||||
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||||
|
self.filter_sum_of_criteria_terms =
|
||||||
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
||||||
|
self.filter_total_number_of_criteria =
|
||||||
|
self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria);
|
||||||
|
for (key, value) in used_syntax.into_iter() {
|
||||||
|
let used_syntax = self.used_syntax.entry(key).or_insert(0);
|
||||||
|
*used_syntax = used_syntax.saturating_add(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.embedder |= embedder;
|
||||||
|
|
||||||
|
// pagination
|
||||||
|
self.max_limit = self.max_limit.max(max_limit);
|
||||||
|
self.max_offset = self.max_offset.max(max_offset);
|
||||||
|
|
||||||
|
// formatting
|
||||||
|
self.max_attributes_to_retrieve =
|
||||||
|
self.max_attributes_to_retrieve.max(max_attributes_to_retrieve);
|
||||||
|
|
||||||
|
// scoring
|
||||||
|
self.show_ranking_score |= show_ranking_score;
|
||||||
|
self.show_ranking_score_details |= show_ranking_score_details;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||||
|
let Self {
|
||||||
|
timestamp,
|
||||||
|
user_agents,
|
||||||
|
total_received,
|
||||||
|
total_succeeded,
|
||||||
|
time_spent,
|
||||||
|
filter_with_geo_radius,
|
||||||
|
filter_with_geo_bounding_box,
|
||||||
|
filter_sum_of_criteria_terms,
|
||||||
|
filter_total_number_of_criteria,
|
||||||
|
used_syntax,
|
||||||
|
max_limit,
|
||||||
|
max_offset,
|
||||||
|
max_attributes_to_retrieve,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
embedder,
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
if total_received == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
// we get all the values in a sorted manner
|
||||||
|
let time_spent = time_spent.into_sorted_vec();
|
||||||
|
// the index of the 99th percentage of value
|
||||||
|
let percentile_99th = time_spent.len() * 99 / 100;
|
||||||
|
// We are only interested by the slowest value of the 99th fastest results
|
||||||
|
let time_spent = time_spent.get(percentile_99th);
|
||||||
|
|
||||||
|
let properties = json!({
|
||||||
|
"user-agent": user_agents,
|
||||||
|
"requests": {
|
||||||
|
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||||
|
"total_succeeded": total_succeeded,
|
||||||
|
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||||
|
"total_received": total_received,
|
||||||
|
},
|
||||||
|
"filter": {
|
||||||
|
"with_geoRadius": filter_with_geo_radius,
|
||||||
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||||
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||||
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
|
},
|
||||||
|
"hybrid": {
|
||||||
|
"embedder": embedder,
|
||||||
|
},
|
||||||
|
"pagination": {
|
||||||
|
"max_limit": max_limit,
|
||||||
|
"max_offset": max_offset,
|
||||||
|
},
|
||||||
|
"formatting": {
|
||||||
|
"max_attributes_to_retrieve": max_attributes_to_retrieve,
|
||||||
|
},
|
||||||
|
"scoring": {
|
||||||
|
"show_ranking_score": show_ranking_score,
|
||||||
|
"show_ranking_score_details": show_ranking_score_details,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
Some(Track {
|
||||||
|
timestamp,
|
||||||
|
user: user.clone(),
|
||||||
|
event: event_name.to_string(),
|
||||||
|
properties,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -69,7 +69,7 @@ pub async fn search(
|
|||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut search_query, search_rules);
|
add_search_rules(&mut search_query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
@ -29,6 +29,7 @@ pub mod documents;
|
|||||||
pub mod facet_search;
|
pub mod facet_search;
|
||||||
pub mod search;
|
pub mod search;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
|
pub mod similar;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(
|
cfg.service(
|
||||||
@ -48,6 +49,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||||||
.service(web::scope("/documents").configure(documents::configure))
|
.service(web::scope("/documents").configure(documents::configure))
|
||||||
.service(web::scope("/search").configure(search::configure))
|
.service(web::scope("/search").configure(search::configure))
|
||||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||||
|
.service(web::scope("/similar").configure(similar::configure))
|
||||||
.service(web::scope("/settings").configure(settings::configure)),
|
.service(web::scope("/settings").configure(settings::configure)),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -196,7 +196,7 @@ pub async fn search_with_url_query(
|
|||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut query, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||||
@ -235,7 +235,7 @@ pub async fn search_with_post(
|
|||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut query, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||||
|
171
meilisearch/src/routes/indexes/similar.rs
Normal file
171
meilisearch/src/routes/indexes/similar.rs
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
use actix_web::web::{self, Data};
|
||||||
|
use actix_web::{HttpRequest, HttpResponse};
|
||||||
|
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||||
|
use index_scheduler::IndexScheduler;
|
||||||
|
use meilisearch_types::deserr::query_params::Param;
|
||||||
|
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||||
|
use meilisearch_types::error::deserr_codes::{
|
||||||
|
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
|
||||||
|
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
|
||||||
|
InvalidSimilarShowRankingScoreDetails,
|
||||||
|
};
|
||||||
|
use meilisearch_types::error::{ErrorCode as _, ResponseError};
|
||||||
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
|
use meilisearch_types::keys::actions;
|
||||||
|
use meilisearch_types::serde_cs::vec::CS;
|
||||||
|
use serde_json::Value;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
|
use super::ActionPolicy;
|
||||||
|
use crate::analytics::{Analytics, SimilarAggregator};
|
||||||
|
use crate::extractors::authentication::GuardedData;
|
||||||
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
use crate::search::{
|
||||||
|
add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
|
||||||
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
|
cfg.service(
|
||||||
|
web::resource("")
|
||||||
|
.route(web::get().to(SeqHandler(similar_get)))
|
||||||
|
.route(web::post().to(SeqHandler(similar_post))),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn similar_get(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebQueryParameter<SimilarQueryGet, DeserrQueryParamError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
analytics: web::Data<dyn Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
|
let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
|
||||||
|
ResponseError::from_msg(code.to_string(), code.error_code())
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
|
debug!(parameters = ?query, "Similar get");
|
||||||
|
|
||||||
|
let similar = similar(index_scheduler, index_uid, query).await;
|
||||||
|
|
||||||
|
if let Ok(similar) = &similar {
|
||||||
|
aggregate.succeed(similar);
|
||||||
|
}
|
||||||
|
analytics.get_similar(aggregate);
|
||||||
|
|
||||||
|
let similar = similar?;
|
||||||
|
|
||||||
|
debug!(returns = ?similar, "Similar get");
|
||||||
|
Ok(HttpResponse::Ok().json(similar))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn similar_post(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
|
index_uid: web::Path<String>,
|
||||||
|
params: AwebJson<SimilarQuery, DeserrJsonError>,
|
||||||
|
req: HttpRequest,
|
||||||
|
analytics: web::Data<dyn Analytics>,
|
||||||
|
) -> Result<HttpResponse, ResponseError> {
|
||||||
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
|
|
||||||
|
let query = params.into_inner();
|
||||||
|
debug!(parameters = ?query, "Similar post");
|
||||||
|
|
||||||
|
let mut aggregate = SimilarAggregator::from_query(&query, &req);
|
||||||
|
|
||||||
|
let similar = similar(index_scheduler, index_uid, query).await;
|
||||||
|
|
||||||
|
if let Ok(similar) = &similar {
|
||||||
|
aggregate.succeed(similar);
|
||||||
|
}
|
||||||
|
analytics.post_similar(aggregate);
|
||||||
|
|
||||||
|
let similar = similar?;
|
||||||
|
|
||||||
|
debug!(returns = ?similar, "Similar post");
|
||||||
|
Ok(HttpResponse::Ok().json(similar))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn similar(
|
||||||
|
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||||
|
index_uid: IndexUid,
|
||||||
|
mut query: SimilarQuery,
|
||||||
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
|
features.check_vector("Using the similar API")?;
|
||||||
|
|
||||||
|
// Tenant token search_rules.
|
||||||
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
|
|
||||||
|
let (embedder_name, embedder) =
|
||||||
|
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct SimilarQueryGet {
|
||||||
|
#[deserr(error = DeserrQueryParamError<InvalidSimilarId>)]
|
||||||
|
id: Param<String>,
|
||||||
|
#[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSimilarOffset>)]
|
||||||
|
offset: Param<usize>,
|
||||||
|
#[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSimilarLimit>)]
|
||||||
|
limit: Param<usize>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
|
attributes_to_retrieve: Option<CS<String>>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarFilter>)]
|
||||||
|
filter: Option<String>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScore>)]
|
||||||
|
show_ranking_score: Param<bool>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
|
||||||
|
show_ranking_score_details: Param<bool>,
|
||||||
|
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
|
||||||
|
pub embedder: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<SimilarQueryGet> for SimilarQuery {
|
||||||
|
type Error = InvalidSimilarId;
|
||||||
|
|
||||||
|
fn try_from(
|
||||||
|
SimilarQueryGet {
|
||||||
|
id,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
attributes_to_retrieve,
|
||||||
|
filter,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
embedder,
|
||||||
|
}: SimilarQueryGet,
|
||||||
|
) -> Result<Self, Self::Error> {
|
||||||
|
let filter = match filter {
|
||||||
|
Some(f) => match serde_json::from_str(&f) {
|
||||||
|
Ok(v) => Some(v),
|
||||||
|
_ => Some(Value::String(f)),
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(SimilarQuery {
|
||||||
|
id: id.0.try_into()?,
|
||||||
|
offset: offset.0,
|
||||||
|
limit: limit.0,
|
||||||
|
filter,
|
||||||
|
embedder,
|
||||||
|
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
|
||||||
|
show_ranking_score: show_ranking_score.0,
|
||||||
|
show_ranking_score_details: show_ranking_score_details.0,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -67,7 +67,7 @@ pub async fn multi_search_with_post(
|
|||||||
// Apply search rules from tenant token
|
// Apply search rules from tenant token
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
|
||||||
{
|
{
|
||||||
add_search_rules(&mut query, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let index = index_scheduler
|
let index = index_scheduler
|
||||||
|
@ -11,7 +11,7 @@ use indexmap::IndexMap;
|
|||||||
use meilisearch_auth::IndexSearchRules;
|
use meilisearch_auth::IndexSearchRules;
|
||||||
use meilisearch_types::deserr::DeserrJsonError;
|
use meilisearch_types::deserr::DeserrJsonError;
|
||||||
use meilisearch_types::error::deserr_codes::*;
|
use meilisearch_types::error::deserr_codes::*;
|
||||||
use meilisearch_types::error::ResponseError;
|
use meilisearch_types::error::{Code, ResponseError};
|
||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
@ -231,7 +231,7 @@ impl SearchKind {
|
|||||||
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn embedder(
|
pub(crate) fn embedder(
|
||||||
index_scheduler: &index_scheduler::IndexScheduler,
|
index_scheduler: &index_scheduler::IndexScheduler,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
embedder_name: Option<&str>,
|
embedder_name: Option<&str>,
|
||||||
@ -417,6 +417,59 @@ impl SearchQueryWithIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||||
|
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||||
|
pub struct SimilarQuery {
|
||||||
|
#[deserr(error = DeserrJsonError<InvalidSimilarId>)]
|
||||||
|
pub id: ExternalDocumentId,
|
||||||
|
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
|
||||||
|
pub offset: usize,
|
||||||
|
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
|
||||||
|
pub limit: usize,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
|
||||||
|
pub filter: Option<Value>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
|
||||||
|
pub embedder: Option<String>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
|
||||||
|
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScore>, default)]
|
||||||
|
pub show_ranking_score: bool,
|
||||||
|
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
|
||||||
|
pub show_ranking_score_details: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Deserr)]
|
||||||
|
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
|
||||||
|
pub struct ExternalDocumentId(String);
|
||||||
|
|
||||||
|
impl AsRef<str> for ExternalDocumentId {
|
||||||
|
fn as_ref(&self) -> &str {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExternalDocumentId {
|
||||||
|
pub fn into_inner(self) -> String {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<String> for ExternalDocumentId {
|
||||||
|
type Error = InvalidSimilarId;
|
||||||
|
|
||||||
|
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||||
|
serde_json::Value::String(value).try_into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Value> for ExternalDocumentId {
|
||||||
|
type Error = InvalidSimilarId;
|
||||||
|
|
||||||
|
fn try_from(value: Value) -> Result<Self, Self::Error> {
|
||||||
|
Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||||
#[deserr(rename_all = camelCase)]
|
#[deserr(rename_all = camelCase)]
|
||||||
pub enum MatchingStrategy {
|
pub enum MatchingStrategy {
|
||||||
@ -538,6 +591,16 @@ impl fmt::Debug for SearchResult {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct SimilarResult {
|
||||||
|
pub hits: Vec<SearchHit>,
|
||||||
|
pub id: String,
|
||||||
|
pub processing_time_ms: u128,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub hits_info: HitsInfo,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct SearchResultWithIndex {
|
pub struct SearchResultWithIndex {
|
||||||
@ -570,8 +633,8 @@ pub struct FacetSearchResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Incorporate search rules in search query
|
/// Incorporate search rules in search query
|
||||||
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
pub fn add_search_rules(filter: &mut Option<Value>, rules: IndexSearchRules) {
|
||||||
query.filter = match (query.filter.take(), rules.filter) {
|
*filter = match (filter.take(), rules.filter) {
|
||||||
(None, rules_filter) => rules_filter,
|
(None, rules_filter) => rules_filter,
|
||||||
(filter, None) => filter,
|
(filter, None) => filter,
|
||||||
(Some(filter), Some(rules_filter)) => {
|
(Some(filter), Some(rules_filter)) => {
|
||||||
@ -719,131 +782,52 @@ pub fn perform_search(
|
|||||||
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let SearchQuery {
|
||||||
|
q,
|
||||||
|
vector: _,
|
||||||
|
hybrid: _,
|
||||||
|
// already computed from prepare_search
|
||||||
|
offset: _,
|
||||||
|
limit,
|
||||||
|
page,
|
||||||
|
hits_per_page,
|
||||||
|
attributes_to_retrieve,
|
||||||
|
attributes_to_crop,
|
||||||
|
crop_length,
|
||||||
|
attributes_to_highlight,
|
||||||
|
show_matches_position,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
filter: _,
|
||||||
|
sort,
|
||||||
|
facets,
|
||||||
|
highlight_pre_tag,
|
||||||
|
highlight_post_tag,
|
||||||
|
crop_marker,
|
||||||
|
matching_strategy: _,
|
||||||
|
attributes_to_search_on: _,
|
||||||
|
} = query;
|
||||||
|
|
||||||
let displayed_ids = index
|
let format = AttributesFormat {
|
||||||
.displayed_fields_ids(&rtxn)?
|
attributes_to_retrieve,
|
||||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
attributes_to_highlight,
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
attributes_to_crop,
|
||||||
|
crop_length,
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
crop_marker,
|
||||||
let mut ids = BTreeSet::new();
|
highlight_pre_tag,
|
||||||
for attr in attrs {
|
highlight_post_tag,
|
||||||
if attr == "*" {
|
show_matches_position,
|
||||||
ids.clone_from(&displayed_ids);
|
sort,
|
||||||
break;
|
show_ranking_score,
|
||||||
}
|
show_ranking_score_details,
|
||||||
|
|
||||||
if let Some(id) = fields_ids_map.id(attr) {
|
|
||||||
ids.insert(id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ids
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// The attributes to retrieve are the ones explicitly marked as to retrieve (all by default),
|
let documents =
|
||||||
// but these attributes must be also be present
|
make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?;
|
||||||
// - in the fields_ids_map
|
|
||||||
// - in the displayed attributes
|
|
||||||
let to_retrieve_ids: BTreeSet<_> = query
|
|
||||||
.attributes_to_retrieve
|
|
||||||
.as_ref()
|
|
||||||
.map(fids)
|
|
||||||
.unwrap_or_else(|| displayed_ids.clone())
|
|
||||||
.intersection(&displayed_ids)
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default();
|
|
||||||
|
|
||||||
let attr_to_crop = query.attributes_to_crop.unwrap_or_default();
|
|
||||||
|
|
||||||
// Attributes in `formatted_options` correspond to the attributes that will be in `_formatted`
|
|
||||||
// These attributes are:
|
|
||||||
// - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`)
|
|
||||||
// - the attributes asked to be retrieved: these attributes will not be highlighted/cropped
|
|
||||||
// But these attributes must be also present in displayed attributes
|
|
||||||
let formatted_options = compute_formatted_options(
|
|
||||||
&attr_to_highlight,
|
|
||||||
&attr_to_crop,
|
|
||||||
query.crop_length,
|
|
||||||
&to_retrieve_ids,
|
|
||||||
&fields_ids_map,
|
|
||||||
&displayed_ids,
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut tokenizer_builder = TokenizerBuilder::default();
|
|
||||||
tokenizer_builder.create_char_map(true);
|
|
||||||
|
|
||||||
let script_lang_map = index.script_language(&rtxn)?;
|
|
||||||
if !script_lang_map.is_empty() {
|
|
||||||
tokenizer_builder.allow_list(&script_lang_map);
|
|
||||||
}
|
|
||||||
|
|
||||||
let separators = index.allowed_separators(&rtxn)?;
|
|
||||||
let separators: Option<Vec<_>> =
|
|
||||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
||||||
if let Some(ref separators) = separators {
|
|
||||||
tokenizer_builder.separators(separators);
|
|
||||||
}
|
|
||||||
|
|
||||||
let dictionary = index.dictionary(&rtxn)?;
|
|
||||||
let dictionary: Option<Vec<_>> =
|
|
||||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
|
||||||
if let Some(ref dictionary) = dictionary {
|
|
||||||
tokenizer_builder.words_dict(dictionary);
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
|
||||||
formatter_builder.crop_marker(query.crop_marker);
|
|
||||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
|
||||||
formatter_builder.highlight_suffix(query.highlight_post_tag);
|
|
||||||
|
|
||||||
let mut documents = Vec::new();
|
|
||||||
let documents_iter = index.documents(&rtxn, documents_ids)?;
|
|
||||||
|
|
||||||
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
|
||||||
// First generate a document with all the displayed fields
|
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
|
||||||
|
|
||||||
// select the attributes to retrieve
|
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
|
||||||
.iter()
|
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
|
||||||
let mut document =
|
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
|
||||||
|
|
||||||
let (matches_position, formatted) = format_fields(
|
|
||||||
&displayed_document,
|
|
||||||
&fields_ids_map,
|
|
||||||
&formatter_builder,
|
|
||||||
&formatted_options,
|
|
||||||
query.show_matches_position,
|
|
||||||
&displayed_ids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
if let Some(sort) = query.sort.as_ref() {
|
|
||||||
insert_geo_distance(sort, &mut document);
|
|
||||||
}
|
|
||||||
|
|
||||||
let ranking_score =
|
|
||||||
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
|
||||||
let ranking_score_details =
|
|
||||||
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
|
||||||
|
|
||||||
let hit = SearchHit {
|
|
||||||
document,
|
|
||||||
formatted,
|
|
||||||
matches_position,
|
|
||||||
ranking_score_details,
|
|
||||||
ranking_score,
|
|
||||||
};
|
|
||||||
documents.push(hit);
|
|
||||||
}
|
|
||||||
|
|
||||||
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
let hits_info = if is_finite_pagination {
|
let hits_info = if is_finite_pagination {
|
||||||
let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
let hits_per_page = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
|
||||||
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
// If hit_per_page is 0, then pages can't be computed and so we respond 0.
|
||||||
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
let total_pages = (number_of_hits + hits_per_page.saturating_sub(1))
|
||||||
.checked_div(hits_per_page)
|
.checked_div(hits_per_page)
|
||||||
@ -851,15 +835,15 @@ pub fn perform_search(
|
|||||||
|
|
||||||
HitsInfo::Pagination {
|
HitsInfo::Pagination {
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
page: query.page.unwrap_or(1),
|
page: page.unwrap_or(1),
|
||||||
total_pages,
|
total_pages,
|
||||||
total_hits: number_of_hits,
|
total_hits: number_of_hits,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
|
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
|
||||||
};
|
};
|
||||||
|
|
||||||
let (facet_distribution, facet_stats) = match query.facets {
|
let (facet_distribution, facet_stats) = match facets {
|
||||||
Some(ref fields) => {
|
Some(ref fields) => {
|
||||||
let mut facet_distribution = index.facets_distribution(&rtxn);
|
let mut facet_distribution = index.facets_distribution(&rtxn);
|
||||||
|
|
||||||
@ -896,7 +880,7 @@ pub fn perform_search(
|
|||||||
let result = SearchResult {
|
let result = SearchResult {
|
||||||
hits: documents,
|
hits: documents,
|
||||||
hits_info,
|
hits_info,
|
||||||
query: query.q.unwrap_or_default(),
|
query: q.unwrap_or_default(),
|
||||||
processing_time_ms: before_search.elapsed().as_millis(),
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
facet_distribution,
|
facet_distribution,
|
||||||
facet_stats,
|
facet_stats,
|
||||||
@ -907,6 +891,130 @@ pub fn perform_search(
|
|||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct AttributesFormat {
|
||||||
|
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
|
attributes_to_highlight: Option<HashSet<String>>,
|
||||||
|
attributes_to_crop: Option<Vec<String>>,
|
||||||
|
crop_length: usize,
|
||||||
|
crop_marker: String,
|
||||||
|
highlight_pre_tag: String,
|
||||||
|
highlight_post_tag: String,
|
||||||
|
show_matches_position: bool,
|
||||||
|
sort: Option<Vec<String>>,
|
||||||
|
show_ranking_score: bool,
|
||||||
|
show_ranking_score_details: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_hits(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &RoTxn<'_>,
|
||||||
|
format: AttributesFormat,
|
||||||
|
matching_words: milli::MatchingWords,
|
||||||
|
documents_ids: Vec<u32>,
|
||||||
|
document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
|
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||||
|
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
||||||
|
let displayed_ids = index
|
||||||
|
.displayed_fields_ids(rtxn)?
|
||||||
|
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
||||||
|
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
|
let fids = |attrs: &BTreeSet<String>| {
|
||||||
|
let mut ids = BTreeSet::new();
|
||||||
|
for attr in attrs {
|
||||||
|
if attr == "*" {
|
||||||
|
ids.clone_from(&displayed_ids);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(id) = fields_ids_map.id(attr) {
|
||||||
|
ids.insert(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ids
|
||||||
|
};
|
||||||
|
let to_retrieve_ids: BTreeSet<_> = format
|
||||||
|
.attributes_to_retrieve
|
||||||
|
.as_ref()
|
||||||
|
.map(fids)
|
||||||
|
.unwrap_or_else(|| displayed_ids.clone())
|
||||||
|
.intersection(&displayed_ids)
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
||||||
|
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
||||||
|
let formatted_options = compute_formatted_options(
|
||||||
|
&attr_to_highlight,
|
||||||
|
&attr_to_crop,
|
||||||
|
format.crop_length,
|
||||||
|
&to_retrieve_ids,
|
||||||
|
&fields_ids_map,
|
||||||
|
&displayed_ids,
|
||||||
|
);
|
||||||
|
let mut tokenizer_builder = TokenizerBuilder::default();
|
||||||
|
tokenizer_builder.create_char_map(true);
|
||||||
|
let script_lang_map = index.script_language(rtxn)?;
|
||||||
|
if !script_lang_map.is_empty() {
|
||||||
|
tokenizer_builder.allow_list(&script_lang_map);
|
||||||
|
}
|
||||||
|
let separators = index.allowed_separators(rtxn)?;
|
||||||
|
let separators: Option<Vec<_>> =
|
||||||
|
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
if let Some(ref separators) = separators {
|
||||||
|
tokenizer_builder.separators(separators);
|
||||||
|
}
|
||||||
|
let dictionary = index.dictionary(rtxn)?;
|
||||||
|
let dictionary: Option<Vec<_>> =
|
||||||
|
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||||
|
if let Some(ref dictionary) = dictionary {
|
||||||
|
tokenizer_builder.words_dict(dictionary);
|
||||||
|
}
|
||||||
|
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||||
|
formatter_builder.crop_marker(format.crop_marker);
|
||||||
|
formatter_builder.highlight_prefix(format.highlight_pre_tag);
|
||||||
|
formatter_builder.highlight_suffix(format.highlight_post_tag);
|
||||||
|
let mut documents = Vec::new();
|
||||||
|
let documents_iter = index.documents(rtxn, documents_ids)?;
|
||||||
|
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
|
||||||
|
// First generate a document with all the displayed fields
|
||||||
|
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||||
|
|
||||||
|
// select the attributes to retrieve
|
||||||
|
let attributes_to_retrieve = to_retrieve_ids
|
||||||
|
.iter()
|
||||||
|
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||||
|
let mut document =
|
||||||
|
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||||
|
|
||||||
|
let (matches_position, formatted) = format_fields(
|
||||||
|
&displayed_document,
|
||||||
|
&fields_ids_map,
|
||||||
|
&formatter_builder,
|
||||||
|
&formatted_options,
|
||||||
|
format.show_matches_position,
|
||||||
|
&displayed_ids,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if let Some(sort) = format.sort.as_ref() {
|
||||||
|
insert_geo_distance(sort, &mut document);
|
||||||
|
}
|
||||||
|
|
||||||
|
let ranking_score =
|
||||||
|
format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
|
||||||
|
let ranking_score_details =
|
||||||
|
format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
|
||||||
|
|
||||||
|
let hit = SearchHit {
|
||||||
|
document,
|
||||||
|
formatted,
|
||||||
|
matches_position,
|
||||||
|
ranking_score_details,
|
||||||
|
ranking_score,
|
||||||
|
};
|
||||||
|
documents.push(hit);
|
||||||
|
}
|
||||||
|
Ok(documents)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn perform_facet_search(
|
pub fn perform_facet_search(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
search_query: SearchQuery,
|
search_query: SearchQuery,
|
||||||
@ -941,6 +1049,95 @@ pub fn perform_facet_search(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn perform_similar(
|
||||||
|
index: &Index,
|
||||||
|
query: SimilarQuery,
|
||||||
|
embedder_name: String,
|
||||||
|
embedder: Arc<Embedder>,
|
||||||
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
|
let before_search = Instant::now();
|
||||||
|
let rtxn = index.read_txn()?;
|
||||||
|
|
||||||
|
let SimilarQuery {
|
||||||
|
id,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
|
filter: _,
|
||||||
|
embedder: _,
|
||||||
|
attributes_to_retrieve,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
|
||||||
|
// preventing a use-after-move
|
||||||
|
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
MeilisearchHttpError::DocumentNotFound(id.into_inner()).to_string(),
|
||||||
|
Code::NotFoundSimilarId,
|
||||||
|
));
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut similar =
|
||||||
|
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
|
||||||
|
|
||||||
|
if let Some(ref filter) = query.filter {
|
||||||
|
if let Some(facets) = parse_filter(filter)
|
||||||
|
// inject InvalidSimilarFilter code
|
||||||
|
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
|
||||||
|
{
|
||||||
|
similar.filter(facets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let milli::SearchResult {
|
||||||
|
documents_ids,
|
||||||
|
matching_words: _,
|
||||||
|
candidates,
|
||||||
|
document_scores,
|
||||||
|
degraded: _,
|
||||||
|
used_negative_operator: _,
|
||||||
|
} = similar.execute().map_err(|err| match err {
|
||||||
|
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
|
||||||
|
ResponseError::from_msg(err.to_string(), Code::InvalidSimilarFilter)
|
||||||
|
}
|
||||||
|
err => err.into(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let format = AttributesFormat {
|
||||||
|
attributes_to_retrieve,
|
||||||
|
attributes_to_highlight: None,
|
||||||
|
attributes_to_crop: None,
|
||||||
|
crop_length: DEFAULT_CROP_LENGTH(),
|
||||||
|
crop_marker: DEFAULT_CROP_MARKER(),
|
||||||
|
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
|
||||||
|
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||||
|
show_matches_position: false,
|
||||||
|
sort: None,
|
||||||
|
show_ranking_score,
|
||||||
|
show_ranking_score_details,
|
||||||
|
};
|
||||||
|
|
||||||
|
let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?;
|
||||||
|
|
||||||
|
let max_total_hits = index
|
||||||
|
.pagination_max_total_hits(&rtxn)
|
||||||
|
.map_err(milli::Error::from)?
|
||||||
|
.map(|x| x as usize)
|
||||||
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||||
|
|
||||||
|
let number_of_hits = min(candidates.len() as usize, max_total_hits);
|
||||||
|
let hits_info = HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits };
|
||||||
|
|
||||||
|
let result = SimilarResult {
|
||||||
|
hits,
|
||||||
|
hits_info,
|
||||||
|
id: id.into_inner(),
|
||||||
|
processing_time_ms: before_search.elapsed().as_millis(),
|
||||||
|
};
|
||||||
|
Ok(result)
|
||||||
|
}
|
||||||
|
|
||||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
static ref GEO_REGEX: Regex =
|
static ref GEO_REGEX: Regex =
|
||||||
|
@ -380,6 +380,43 @@ impl Index<'_> {
|
|||||||
self.service.get(url).await
|
self.service.get(url).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Performs both GET and POST similar queries
|
||||||
|
pub async fn similar(
|
||||||
|
&self,
|
||||||
|
query: Value,
|
||||||
|
test: impl Fn(Value, StatusCode) + UnwindSafe + Clone,
|
||||||
|
) {
|
||||||
|
let post = self.similar_post(query.clone()).await;
|
||||||
|
|
||||||
|
let query = yaup::to_string(&query).unwrap();
|
||||||
|
let get = self.similar_get(&query).await;
|
||||||
|
|
||||||
|
insta::allow_duplicates! {
|
||||||
|
let (response, code) = post;
|
||||||
|
let t = test.clone();
|
||||||
|
if let Err(e) = catch_unwind(move || t(response, code)) {
|
||||||
|
eprintln!("Error with post search");
|
||||||
|
resume_unwind(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
let (response, code) = get;
|
||||||
|
if let Err(e) = catch_unwind(move || test(response, code)) {
|
||||||
|
eprintln!("Error with get search");
|
||||||
|
resume_unwind(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn similar_post(&self, query: Value) -> (Value, StatusCode) {
|
||||||
|
let url = format!("/indexes/{}/similar", urlencode(self.uid.as_ref()));
|
||||||
|
self.service.post_encoded(url, query, self.encoder).await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
|
||||||
|
let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
|
||||||
|
self.service.get(url).await
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) {
|
pub async fn facet_search(&self, query: Value) -> (Value, StatusCode) {
|
||||||
let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref()));
|
let url = format!("/indexes/{}/facet-search", urlencode(self.uid.as_ref()));
|
||||||
self.service.post_encoded(url, query, self.encoder).await
|
self.service.post_encoded(url, query, self.encoder).await
|
||||||
|
@ -8,6 +8,7 @@ mod index;
|
|||||||
mod logs;
|
mod logs;
|
||||||
mod search;
|
mod search;
|
||||||
mod settings;
|
mod settings;
|
||||||
|
mod similar;
|
||||||
mod snapshot;
|
mod snapshot;
|
||||||
mod stats;
|
mod stats;
|
||||||
mod swap_indexes;
|
mod swap_indexes;
|
||||||
|
696
meilisearch/tests/similar/errors.rs
Normal file
696
meilisearch/tests/similar/errors.rs
Normal file
@ -0,0 +1,696 @@
|
|||||||
|
use meili_snap::*;
|
||||||
|
|
||||||
|
use super::DOCUMENTS;
|
||||||
|
use crate::common::Server;
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_unexisting_index() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Index `test` not found.",
|
||||||
|
"code": "index_not_found",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||||
|
});
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947}), |response, code| {
|
||||||
|
assert_eq!(code, 404);
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_unexisting_parameter() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "marin": "hello"}), |response, code| {
|
||||||
|
assert_eq!(code, 400, "{}", response);
|
||||||
|
assert_eq!(response["code"], "bad_request");
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_feature_not_enabled() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": 287947})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Using the similar API requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_bad_id() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||||
|
"code": "invalid_similar_id",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_invalid_id() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).",
|
||||||
|
"code": "invalid_similar_id",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_id"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_not_found_id() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Document `definitely-doesnt-exist` not found.",
|
||||||
|
"code": "not_found_similar_id",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#not_found_similar_id"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_bad_offset() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||||
|
"code": "invalid_similar_offset",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index.similar_get("id=287947&offset=doggo").await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value in parameter `offset`: could not parse `doggo` as a positive integer",
|
||||||
|
"code": "invalid_similar_offset",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_offset"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_bad_limit() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||||
|
"code": "invalid_similar_limit",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index.similar_get("id=287946&limit=doggo").await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid value in parameter `limit`: could not parse `doggo` as a positive integer",
|
||||||
|
"code": "invalid_similar_limit",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_limit"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn similar_bad_filter() {
|
||||||
|
// Since a filter is deserialized as a json Value it will never fail to deserialize.
|
||||||
|
// Thus the error message is not generated by deserr but written by us.
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await;
|
||||||
|
snapshot!(code, @"400 Bad Request");
|
||||||
|
snapshot!(json_string!(response), @r###"
|
||||||
|
{
|
||||||
|
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
// Can't make the `filter` fail with a get search since it'll accept anything as a strings.
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_invalid_syntax_object() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_invalid_syntax_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_invalid_syntax_string() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(
|
||||||
|
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}),
|
||||||
|
|response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_invalid_attribute_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_invalid_attribute_string() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_geo_attribute_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_geo_attribute_string() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_attribute_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_attribute_string() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_geo_point_array() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter_reserved_geo_point_string() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
let expected_response = json!({
|
||||||
|
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||||
|
"code": "invalid_similar_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||||
|
});
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| {
|
||||||
|
assert_eq!(response, expected_response);
|
||||||
|
assert_eq!(code, 400);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
373
meilisearch/tests/similar/mod.rs
Normal file
373
meilisearch/tests/similar/mod.rs
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
mod errors;
|
||||||
|
|
||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
|
use crate::common::{Server, Value};
|
||||||
|
use crate::json;
|
||||||
|
|
||||||
|
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||||
|
json!([
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "287947",
|
||||||
|
// Three semantic properties:
|
||||||
|
// 1. magic, anything that reminds you of magic
|
||||||
|
// 2. authority, anything that inspires command
|
||||||
|
// 3. horror, anything that inspires fear or dread
|
||||||
|
"_vectors": { "manual": [0.8, 0.4, -0.5]},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "299537",
|
||||||
|
"_vectors": { "manual": [0.6, 0.8, -0.2] },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Escape Room",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "522681",
|
||||||
|
"_vectors": { "manual": [0.1, 0.6, 0.8] },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "166428",
|
||||||
|
"_vectors": { "manual": [0.7, 0.7, -0.4] },
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "All Quiet on the Western Front",
|
||||||
|
"release_year": 1930,
|
||||||
|
"id": "143",
|
||||||
|
"_vectors": { "manual": [-0.5, 0.3, 0.85] },
|
||||||
|
}
|
||||||
|
])
|
||||||
|
});
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn basic() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 143}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "Escape Room",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "522681",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.1,
|
||||||
|
0.6,
|
||||||
|
0.8
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "299537",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.6,
|
||||||
|
0.8,
|
||||||
|
-0.2
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "166428",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.7,
|
||||||
|
0.7,
|
||||||
|
-0.4
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "287947",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.8,
|
||||||
|
0.4,
|
||||||
|
-0.5
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": "299537"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "166428",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.7,
|
||||||
|
0.7,
|
||||||
|
-0.4
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "287947",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.8,
|
||||||
|
0.4,
|
||||||
|
-0.5
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Escape Room",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "522681",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.1,
|
||||||
|
0.6,
|
||||||
|
0.8
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "All Quiet on the Western Front",
|
||||||
|
"release_year": 1930,
|
||||||
|
"id": "143",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
-0.5,
|
||||||
|
0.3,
|
||||||
|
0.85
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn filter() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title", "release_year"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "299537",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.6,
|
||||||
|
0.8,
|
||||||
|
-0.2
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "How to Train Your Dragon: The Hidden World",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "166428",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.7,
|
||||||
|
0.7,
|
||||||
|
-0.4
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "Shazam!",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "287947",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.8,
|
||||||
|
0.4,
|
||||||
|
-0.5
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "All Quiet on the Western Front",
|
||||||
|
"release_year": 1930,
|
||||||
|
"id": "143",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
-0.5,
|
||||||
|
0.3,
|
||||||
|
0.85
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn limit_and_offset() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("test");
|
||||||
|
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(value, @r###"
|
||||||
|
{
|
||||||
|
"vectorStore": true,
|
||||||
|
"metrics": false,
|
||||||
|
"logsRoute": false
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filterableAttributes": ["title"]}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await;
|
||||||
|
|
||||||
|
let documents = DOCUMENTS.clone();
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(value.uid()).await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 143, "limit": 1}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "Escape Room",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "522681",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.1,
|
||||||
|
0.6,
|
||||||
|
0.8
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
|
||||||
|
index
|
||||||
|
.similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
snapshot!(json_string!(response["hits"]), @r###"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"title": "Captain Marvel",
|
||||||
|
"release_year": 2019,
|
||||||
|
"id": "299537",
|
||||||
|
"_vectors": {
|
||||||
|
"manual": [
|
||||||
|
0.6,
|
||||||
|
0.8,
|
||||||
|
-0.2
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
})
|
||||||
|
.await;
|
||||||
|
}
|
@ -31,6 +31,7 @@ macro_rules! verify_snapshot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
|
#[cfg_attr(target_os = "windows", ignore)]
|
||||||
async fn perform_snapshot() {
|
async fn perform_snapshot() {
|
||||||
let temp = tempfile::tempdir().unwrap();
|
let temp = tempfile::tempdir().unwrap();
|
||||||
let snapshot_dir = tempfile::tempdir().unwrap();
|
let snapshot_dir = tempfile::tempdir().unwrap();
|
||||||
|
@ -49,7 +49,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut ctx = SearchContext::new(&index, &txn)?;
|
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||||
let universe = filtered_universe(&ctx, &None)?;
|
let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
|
||||||
|
|
||||||
let docs = execute_search(
|
let docs = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
|
@ -12,7 +12,10 @@ use bimap::BiHashMap;
|
|||||||
pub use builder::DocumentsBatchBuilder;
|
pub use builder::DocumentsBatchBuilder;
|
||||||
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
|
pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
|
||||||
use obkv::KvReader;
|
use obkv::KvReader;
|
||||||
pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
|
pub use primary_key::{
|
||||||
|
validate_document_id_value, DocumentIdExtractionError, FieldIdMapper, PrimaryKey,
|
||||||
|
DEFAULT_PRIMARY_KEY,
|
||||||
|
};
|
||||||
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
|
pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
@ -60,7 +60,7 @@ impl<'a> PrimaryKey<'a> {
|
|||||||
Some(document_id_bytes) => {
|
Some(document_id_bytes) => {
|
||||||
let document_id = serde_json::from_slice(document_id_bytes)
|
let document_id = serde_json::from_slice(document_id_bytes)
|
||||||
.map_err(InternalError::SerdeJson)?;
|
.map_err(InternalError::SerdeJson)?;
|
||||||
match validate_document_id_value(document_id)? {
|
match validate_document_id_value(document_id) {
|
||||||
Ok(document_id) => Ok(Ok(document_id)),
|
Ok(document_id) => Ok(Ok(document_id)),
|
||||||
Err(user_error) => {
|
Err(user_error) => {
|
||||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||||
@ -88,7 +88,7 @@ impl<'a> PrimaryKey<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
match matching_documents_ids.pop() {
|
match matching_documents_ids.pop() {
|
||||||
Some(document_id) => match validate_document_id_value(document_id)? {
|
Some(document_id) => match validate_document_id_value(document_id) {
|
||||||
Ok(document_id) => Ok(Ok(document_id)),
|
Ok(document_id) => Ok(Ok(document_id)),
|
||||||
Err(user_error) => {
|
Err(user_error) => {
|
||||||
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
|
||||||
@ -159,14 +159,14 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
|
pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserError> {
|
||||||
match document_id {
|
match document_id {
|
||||||
Value::String(string) => match validate_document_id(&string) {
|
Value::String(string) => match validate_document_id(&string) {
|
||||||
Some(s) if s.len() == string.len() => Ok(Ok(string)),
|
Some(s) if s.len() == string.len() => Ok(string),
|
||||||
Some(s) => Ok(Ok(s.to_string())),
|
Some(s) => Ok(s.to_string()),
|
||||||
None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
|
None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
|
||||||
},
|
},
|
||||||
Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
|
Value::Number(number) if number.is_i64() => Ok(number.to_string()),
|
||||||
content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
|
content => Err(UserError::InvalidDocumentId { document_id: content }),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1595,6 +1595,22 @@ impl Index {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn arroy_readers<'a>(
|
||||||
|
&'a self,
|
||||||
|
rtxn: &'a RoTxn<'a>,
|
||||||
|
embedder_id: u8,
|
||||||
|
) -> impl Iterator<Item = Result<arroy::Reader<arroy::distances::Angular>>> + 'a {
|
||||||
|
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
|
||||||
|
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||||
|
.map(Some)
|
||||||
|
.or_else(|e| match e {
|
||||||
|
arroy::Error::MissingMetadata => Ok(None),
|
||||||
|
e => Err(e.into()),
|
||||||
|
})
|
||||||
|
.transpose()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
|
||||||
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
|
||||||
}
|
}
|
||||||
|
@ -63,6 +63,7 @@ pub use self::heed_codec::{
|
|||||||
};
|
};
|
||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
||||||
|
pub use self::search::similar::Similar;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
|
||||||
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||||
|
@ -24,6 +24,7 @@ pub mod facet;
|
|||||||
mod fst_utils;
|
mod fst_utils;
|
||||||
pub mod hybrid;
|
pub mod hybrid;
|
||||||
pub mod new;
|
pub mod new;
|
||||||
|
pub mod similar;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SemanticSearch {
|
pub struct SemanticSearch {
|
||||||
@ -148,7 +149,7 @@ impl<'a> Search<'a> {
|
|||||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||||
if has_vector_search {
|
if has_vector_search {
|
||||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||||
filtered_universe(&ctx, &self.filter)
|
filtered_universe(ctx.index, ctx.txn, &self.filter)
|
||||||
} else {
|
} else {
|
||||||
Ok(self.execute()?.candidates)
|
Ok(self.execute()?.candidates)
|
||||||
}
|
}
|
||||||
@ -161,7 +162,7 @@ impl<'a> Search<'a> {
|
|||||||
ctx.attributes_to_search_on(searchable_attributes)?;
|
ctx.attributes_to_search_on(searchable_attributes)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let universe = filtered_universe(&ctx, &self.filter)?;
|
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||||
let PartialSearchResult {
|
let PartialSearchResult {
|
||||||
located_query_terms,
|
located_query_terms,
|
||||||
candidates,
|
candidates,
|
||||||
|
@ -507,7 +507,7 @@ mod tests {
|
|||||||
impl<'a> MatcherBuilder<'a> {
|
impl<'a> MatcherBuilder<'a> {
|
||||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||||
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||||
let universe = filtered_universe(&ctx, &None).unwrap();
|
let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
|
||||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
Some(query),
|
Some(query),
|
||||||
|
@ -543,11 +543,15 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn filtered_universe(ctx: &SearchContext, filters: &Option<Filter>) -> Result<RoaringBitmap> {
|
pub fn filtered_universe(
|
||||||
|
index: &Index,
|
||||||
|
txn: &RoTxn<'_>,
|
||||||
|
filters: &Option<Filter>,
|
||||||
|
) -> Result<RoaringBitmap> {
|
||||||
Ok(if let Some(filters) = filters {
|
Ok(if let Some(filters) = filters {
|
||||||
filters.evaluate(ctx.txn, ctx.index)?
|
filters.evaluate(txn, index)?
|
||||||
} else {
|
} else {
|
||||||
ctx.index.documents_ids(ctx.txn)?
|
index.documents_ids(txn)?
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,19 +49,8 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
vector_candidates: &RoaringBitmap,
|
vector_candidates: &RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let writer_index = (self.embedder_index as u16) << 8;
|
let readers: std::result::Result<Vec<_>, _> =
|
||||||
let readers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
|
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
|
||||||
.map_while(|k| {
|
|
||||||
arroy::Reader::open(ctx.txn, writer_index | (k as u16), ctx.index.vector_arroy)
|
|
||||||
.map(Some)
|
|
||||||
.or_else(|e| match e {
|
|
||||||
arroy::Error::MissingMetadata => Ok(None),
|
|
||||||
e => Err(e),
|
|
||||||
})
|
|
||||||
.transpose()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let readers = readers?;
|
let readers = readers?;
|
||||||
|
|
||||||
let target = &self.target;
|
let target = &self.target;
|
||||||
|
111
milli/src/search/similar.rs
Normal file
111
milli/src/search/similar.rs
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use ordered_float::OrderedFloat;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::score_details::{self, ScoreDetails};
|
||||||
|
use crate::vector::Embedder;
|
||||||
|
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
|
||||||
|
|
||||||
|
pub struct Similar<'a> {
|
||||||
|
id: DocumentId,
|
||||||
|
// this should be linked to the String in the query
|
||||||
|
filter: Option<Filter<'a>>,
|
||||||
|
offset: usize,
|
||||||
|
limit: usize,
|
||||||
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
|
index: &'a Index,
|
||||||
|
embedder_name: String,
|
||||||
|
embedder: Arc<Embedder>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Similar<'a> {
|
||||||
|
pub fn new(
|
||||||
|
id: DocumentId,
|
||||||
|
offset: usize,
|
||||||
|
limit: usize,
|
||||||
|
index: &'a Index,
|
||||||
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
|
embedder_name: String,
|
||||||
|
embedder: Arc<Embedder>,
|
||||||
|
) -> Self {
|
||||||
|
Self { id, filter: None, offset, limit, rtxn, index, embedder_name, embedder }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn filter(&mut self, filter: Filter<'a>) -> &mut Self {
|
||||||
|
self.filter = Some(filter);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
|
let universe = filtered_universe(self.index, self.rtxn, &self.filter)?;
|
||||||
|
|
||||||
|
let embedder_index =
|
||||||
|
self.index
|
||||||
|
.embedder_category_id
|
||||||
|
.get(self.rtxn, &self.embedder_name)?
|
||||||
|
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
|
||||||
|
|
||||||
|
let readers: std::result::Result<Vec<_>, _> =
|
||||||
|
self.index.arroy_readers(self.rtxn, embedder_index).collect();
|
||||||
|
|
||||||
|
let readers = readers?;
|
||||||
|
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
for reader in readers.iter() {
|
||||||
|
let nns_by_item = reader.nns_by_item(
|
||||||
|
self.rtxn,
|
||||||
|
self.id,
|
||||||
|
self.limit + self.offset + 1,
|
||||||
|
None,
|
||||||
|
Some(&universe),
|
||||||
|
)?;
|
||||||
|
if let Some(mut nns_by_item) = nns_by_item {
|
||||||
|
results.append(&mut nns_by_item);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||||
|
|
||||||
|
let mut documents_ids = Vec::with_capacity(self.limit);
|
||||||
|
let mut document_scores = Vec::with_capacity(self.limit);
|
||||||
|
// list of documents we've already seen, so that we don't return the same document multiple times.
|
||||||
|
// initialized to the target document, that we never want to return.
|
||||||
|
let mut documents_seen = RoaringBitmap::new();
|
||||||
|
documents_seen.insert(self.id);
|
||||||
|
|
||||||
|
for (docid, distance) in results
|
||||||
|
.into_iter()
|
||||||
|
// skip documents we've already seen & mark that we saw the current document
|
||||||
|
.filter(|(docid, _)| documents_seen.insert(*docid))
|
||||||
|
.skip(self.offset)
|
||||||
|
// take **after** filter and skip so that we get exactly limit elements if available
|
||||||
|
.take(self.limit)
|
||||||
|
{
|
||||||
|
documents_ids.push(docid);
|
||||||
|
|
||||||
|
let score = 1.0 - distance;
|
||||||
|
let score = self
|
||||||
|
.embedder
|
||||||
|
.distribution()
|
||||||
|
.map(|distribution| distribution.shift(score))
|
||||||
|
.unwrap_or(score);
|
||||||
|
|
||||||
|
let score = ScoreDetails::Vector(score_details::Vector { similarity: Some(score) });
|
||||||
|
|
||||||
|
document_scores.push(vec![score]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(SearchResult {
|
||||||
|
matching_words: Default::default(),
|
||||||
|
candidates: universe,
|
||||||
|
documents_ids,
|
||||||
|
document_scores,
|
||||||
|
degraded: false,
|
||||||
|
used_negative_operator: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -561,10 +561,8 @@ where
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let writer_index = (embedder_index as u16) << 8;
|
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||||
for k in 0..=u8::MAX {
|
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
||||||
let writer =
|
|
||||||
arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension);
|
|
||||||
if writer.is_empty(wtxn)? {
|
if writer.is_empty(wtxn)? {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -634,16 +634,9 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
|
||||||
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
|
||||||
)?;
|
)?;
|
||||||
let writer_index = (embedder_index as u16) << 8;
|
|
||||||
// FIXME: allow customizing distance
|
// FIXME: allow customizing distance
|
||||||
let writers: Vec<_> = (0..=u8::MAX)
|
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
|
||||||
.map(|k| {
|
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
|
||||||
arroy::Writer::new(
|
|
||||||
index.vector_arroy,
|
|
||||||
writer_index | (k as u16),
|
|
||||||
expected_dimension,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// remove vectors for docids we want them removed
|
// remove vectors for docids we want them removed
|
||||||
|
@ -442,3 +442,9 @@ impl DistributionShift {
|
|||||||
pub const fn is_cuda_enabled() -> bool {
|
pub const fn is_cuda_enabled() -> bool {
|
||||||
cfg!(feature = "cuda")
|
cfg!(feature = "cuda")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn arroy_db_range_for_embedder(embedder_id: u8) -> impl Iterator<Item = u16> {
|
||||||
|
let embedder_id = (embedder_id as u16) << 8;
|
||||||
|
|
||||||
|
(0..=u8::MAX).map(move |k| embedder_id | (k as u16))
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user