mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Change retrieveVectors
behavior:
- when the feature is disabled, documents are never modified - when the feature is enabled and `retrieveVectors` is disabled, `_vectors` is removed from documents - when the feature is enabled and `retrieveVectors` is enabled, vectors from the vectors DB are merged with `_vectors` in documents Additionally `_vectors` is never displayed when the `displayedAttributes` list does not contain either `*` or `_vectors` - fixed an issue where `_vectors` was not injected when all vectors in the dataset where always generated
This commit is contained in:
parent
e35ef31738
commit
b9b938c902
@ -40,7 +40,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
use crate::routes::{
|
use crate::routes::{
|
||||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||||
};
|
};
|
||||||
use crate::search::parse_filter;
|
use crate::search::{parse_filter, RetrieveVectors};
|
||||||
use crate::Opt;
|
use crate::Opt;
|
||||||
|
|
||||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||||
@ -110,21 +110,20 @@ pub async fn get_document(
|
|||||||
debug!(parameters = ?params, "Get document");
|
debug!(parameters = ?params, "Get document");
|
||||||
let index_uid = IndexUid::try_from(index_uid)?;
|
let index_uid = IndexUid::try_from(index_uid)?;
|
||||||
|
|
||||||
let GetDocument { fields, retrieve_vectors } = params.into_inner();
|
let GetDocument { fields, retrieve_vectors: param_retrieve_vectors } = params.into_inner();
|
||||||
let attributes_to_retrieve = fields.merge_star_and_none();
|
let attributes_to_retrieve = fields.merge_star_and_none();
|
||||||
|
|
||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
if retrieve_vectors.0 {
|
let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?;
|
||||||
features.check_vector("Passing `retrieveVectors` as a parameter")?;
|
|
||||||
}
|
|
||||||
analytics.get_fetch_documents(
|
analytics.get_fetch_documents(
|
||||||
&DocumentFetchKind::PerDocumentId { retrieve_vectors: retrieve_vectors.0 },
|
&DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 },
|
||||||
&req,
|
&req,
|
||||||
);
|
);
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let document =
|
let document =
|
||||||
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors.0)?;
|
retrieve_document(&index, &document_id, attributes_to_retrieve, retrieve_vectors)?;
|
||||||
debug!(returns = ?document, "Get document");
|
debug!(returns = ?document, "Get document");
|
||||||
Ok(HttpResponse::Ok().json(document))
|
Ok(HttpResponse::Ok().json(document))
|
||||||
}
|
}
|
||||||
@ -195,11 +194,6 @@ pub async fn documents_by_query_post(
|
|||||||
let body = body.into_inner();
|
let body = body.into_inner();
|
||||||
debug!(parameters = ?body, "Get documents POST");
|
debug!(parameters = ?body, "Get documents POST");
|
||||||
|
|
||||||
let features = index_scheduler.features();
|
|
||||||
if body.retrieve_vectors {
|
|
||||||
features.check_vector("Passing `retrieveVectors` as a parameter")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
analytics.post_fetch_documents(
|
analytics.post_fetch_documents(
|
||||||
&DocumentFetchKind::Normal {
|
&DocumentFetchKind::Normal {
|
||||||
with_filter: body.filter.is_some(),
|
with_filter: body.filter.is_some(),
|
||||||
@ -224,11 +218,6 @@ pub async fn get_documents(
|
|||||||
|
|
||||||
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner();
|
||||||
|
|
||||||
let features = index_scheduler.features();
|
|
||||||
if retrieve_vectors.0 {
|
|
||||||
features.check_vector("Passing `retrieveVectors` as a parameter")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
let filter = match filter {
|
let filter = match filter {
|
||||||
Some(f) => match serde_json::from_str(&f) {
|
Some(f) => match serde_json::from_str(&f) {
|
||||||
Ok(v) => Some(v),
|
Ok(v) => Some(v),
|
||||||
@ -266,6 +255,9 @@ fn documents_by_query(
|
|||||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||||
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query;
|
||||||
|
|
||||||
|
let features = index_scheduler.features();
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
|
||||||
|
|
||||||
let index = index_scheduler.index(&index_uid)?;
|
let index = index_scheduler.index(&index_uid)?;
|
||||||
let (total, documents) =
|
let (total, documents) =
|
||||||
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
|
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
|
||||||
@ -608,7 +600,7 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
rtxn: &'t RoTxn,
|
rtxn: &'t RoTxn,
|
||||||
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||||
@ -617,24 +609,32 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||||
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
ret.map_err(ResponseError::from).and_then(|(key, document)| -> Result<_, ResponseError> {
|
||||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, document)?;
|
||||||
|
match retrieve_vectors {
|
||||||
if retrieve_vectors {
|
RetrieveVectors::Ignore => {}
|
||||||
let mut vectors = serde_json::Map::new();
|
RetrieveVectors::Hide => {
|
||||||
for (name, vector) in index.embeddings(rtxn, key)? {
|
document.remove("_vectors");
|
||||||
let user_provided = embedding_configs
|
}
|
||||||
.iter()
|
RetrieveVectors::Retrieve => {
|
||||||
.find(|conf| conf.name == name)
|
let mut vectors = match document.remove("_vectors") {
|
||||||
.is_some_and(|conf| conf.user_provided.contains(key));
|
Some(Value::Object(map)) => map,
|
||||||
let embeddings = ExplicitVectors {
|
_ => Default::default(),
|
||||||
embeddings: Some(vector.into()),
|
};
|
||||||
regenerate: !user_provided,
|
for (name, vector) in index.embeddings(rtxn, key)? {
|
||||||
};
|
let user_provided = embedding_configs
|
||||||
vectors.insert(
|
.iter()
|
||||||
name,
|
.find(|conf| conf.name == name)
|
||||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
.is_some_and(|conf| conf.user_provided.contains(key));
|
||||||
);
|
let embeddings = ExplicitVectors {
|
||||||
|
embeddings: Some(vector.into()),
|
||||||
|
regenerate: !user_provided,
|
||||||
|
};
|
||||||
|
vectors.insert(
|
||||||
|
name,
|
||||||
|
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
document.insert("_vectors".into(), vectors.into());
|
||||||
}
|
}
|
||||||
document.insert("_vectors".into(), vectors.into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(document)
|
Ok(document)
|
||||||
@ -648,7 +648,7 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
filter: Option<Value>,
|
filter: Option<Value>,
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let filter = &filter;
|
let filter = &filter;
|
||||||
@ -688,10 +688,9 @@ fn retrieve_documents<S: AsRef<str>>(
|
|||||||
Ok(match &attributes_to_retrieve {
|
Ok(match &attributes_to_retrieve {
|
||||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||||
&document?,
|
&document?,
|
||||||
attributes_to_retrieve
|
attributes_to_retrieve.iter().map(|s| s.as_ref()).chain(
|
||||||
.iter()
|
(retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors"),
|
||||||
.map(|s| s.as_ref())
|
),
|
||||||
.chain(retrieve_vectors.then_some("_vectors")),
|
|
||||||
),
|
),
|
||||||
None => document?,
|
None => document?,
|
||||||
})
|
})
|
||||||
@ -705,7 +704,7 @@ fn retrieve_document<S: AsRef<str>>(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
doc_id: &str,
|
doc_id: &str,
|
||||||
attributes_to_retrieve: Option<Vec<S>>,
|
attributes_to_retrieve: Option<Vec<S>>,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<Document, ResponseError> {
|
) -> Result<Document, ResponseError> {
|
||||||
let txn = index.read_txn()?;
|
let txn = index.read_txn()?;
|
||||||
|
|
||||||
@ -724,7 +723,7 @@ fn retrieve_document<S: AsRef<str>>(
|
|||||||
attributes_to_retrieve
|
attributes_to_retrieve
|
||||||
.iter()
|
.iter()
|
||||||
.map(|s| s.as_ref())
|
.map(|s| s.as_ref())
|
||||||
.chain(retrieve_vectors.then_some("_vectors")),
|
.chain((retrieve_vectors == RetrieveVectors::Retrieve).then_some("_vectors")),
|
||||||
),
|
),
|
||||||
None => document,
|
None => document,
|
||||||
};
|
};
|
||||||
|
@ -20,9 +20,9 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
|||||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||||
SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||||
DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@ -225,10 +225,12 @@ pub async fn search_with_url_query(
|
|||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
|
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result =
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
perform_search(&index, query, search_kind, retrieve_vector)
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
}
|
}
|
||||||
@ -265,10 +267,13 @@ pub async fn search_with_post(
|
|||||||
let features = index_scheduler.features();
|
let features = index_scheduler.features();
|
||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
let _permit = search_queue.try_get_search_permit().await?;
|
let _permit = search_queue.try_get_search_permit().await?;
|
||||||
let search_result =
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind)).await?;
|
perform_search(&index, query, search_kind, retrieve_vectors)
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
if let Ok(ref search_result) = search_result {
|
if let Ok(ref search_result) = search_result {
|
||||||
aggregate.succeed(search_result);
|
aggregate.succeed(search_result);
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
@ -295,9 +300,6 @@ pub fn search_kind(
|
|||||||
if query.hybrid.is_some() {
|
if query.hybrid.is_some() {
|
||||||
features.check_vector("Passing `hybrid` as a parameter")?;
|
features.check_vector("Passing `hybrid` as a parameter")?;
|
||||||
}
|
}
|
||||||
if query.retrieve_vectors {
|
|
||||||
features.check_vector("Passing `retrieveVectors` as a parameter")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing
|
||||||
if query.vector.is_none() {
|
if query.vector.is_none() {
|
||||||
|
@ -17,8 +17,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
|
|||||||
use crate::extractors::authentication::GuardedData;
|
use crate::extractors::authentication::GuardedData;
|
||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
|
add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind,
|
||||||
SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
@ -93,6 +93,8 @@ async fn similar(
|
|||||||
|
|
||||||
features.check_vector("Using the similar API")?;
|
features.check_vector("Using the similar API")?;
|
||||||
|
|
||||||
|
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
|
||||||
|
|
||||||
// Tenant token search_rules.
|
// Tenant token search_rules.
|
||||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||||
add_search_rules(&mut query.filter, search_rules);
|
add_search_rules(&mut query.filter, search_rules);
|
||||||
@ -103,8 +105,10 @@ async fn similar(
|
|||||||
let (embedder_name, embedder) =
|
let (embedder_name, embedder) =
|
||||||
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || perform_similar(&index, query, embedder_name, embedder))
|
tokio::task::spawn_blocking(move || {
|
||||||
.await?
|
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
|
||||||
|
})
|
||||||
|
.await?
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, deserr::Deserr)]
|
#[derive(Debug, deserr::Deserr)]
|
||||||
|
@ -15,7 +15,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
|||||||
use crate::extractors::sequential_extractor::SeqHandler;
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
use crate::routes::indexes::search::search_kind;
|
use crate::routes::indexes::search::search_kind;
|
||||||
use crate::search::{
|
use crate::search::{
|
||||||
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
|
add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex,
|
||||||
};
|
};
|
||||||
use crate::search_queue::SearchQueue;
|
use crate::search_queue::SearchQueue;
|
||||||
|
|
||||||
@ -83,11 +83,14 @@ pub async fn multi_search_with_post(
|
|||||||
|
|
||||||
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
|
let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)
|
||||||
.with_index(query_index)?;
|
.with_index(query_index)?;
|
||||||
|
let retrieve_vector =
|
||||||
|
RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?;
|
||||||
|
|
||||||
let search_result =
|
let search_result = tokio::task::spawn_blocking(move || {
|
||||||
tokio::task::spawn_blocking(move || perform_search(&index, query, search_kind))
|
perform_search(&index, query, search_kind, retrieve_vector)
|
||||||
.await
|
})
|
||||||
.with_index(query_index)?;
|
.await
|
||||||
|
.with_index(query_index)?;
|
||||||
|
|
||||||
search_results.push(SearchResultWithIndex {
|
search_results.push(SearchResultWithIndex {
|
||||||
index_uid: index_uid.into_inner(),
|
index_uid: index_uid.into_inner(),
|
||||||
|
@ -823,6 +823,7 @@ pub fn perform_search(
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
query: SearchQuery,
|
query: SearchQuery,
|
||||||
search_kind: SearchKind,
|
search_kind: SearchKind,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@ -860,7 +861,8 @@ pub fn perform_search(
|
|||||||
page,
|
page,
|
||||||
hits_per_page,
|
hits_per_page,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
retrieve_vectors,
|
// use the enum passed as parameter
|
||||||
|
retrieve_vectors: _,
|
||||||
attributes_to_crop,
|
attributes_to_crop,
|
||||||
crop_length,
|
crop_length,
|
||||||
attributes_to_highlight,
|
attributes_to_highlight,
|
||||||
@ -968,7 +970,7 @@ pub fn perform_search(
|
|||||||
|
|
||||||
struct AttributesFormat {
|
struct AttributesFormat {
|
||||||
attributes_to_retrieve: Option<BTreeSet<String>>,
|
attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||||
retrieve_vectors: bool,
|
retrieve_vectors: RetrieveVectors,
|
||||||
attributes_to_highlight: Option<HashSet<String>>,
|
attributes_to_highlight: Option<HashSet<String>>,
|
||||||
attributes_to_crop: Option<Vec<String>>,
|
attributes_to_crop: Option<Vec<String>>,
|
||||||
crop_length: usize,
|
crop_length: usize,
|
||||||
@ -981,6 +983,36 @@ struct AttributesFormat {
|
|||||||
show_ranking_score_details: bool,
|
show_ranking_score_details: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum RetrieveVectors {
|
||||||
|
/// Do not touch the `_vectors` field
|
||||||
|
///
|
||||||
|
/// this is the behavior when the vectorStore feature is disabled
|
||||||
|
Ignore,
|
||||||
|
/// Remove the `_vectors` field
|
||||||
|
///
|
||||||
|
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `false`
|
||||||
|
Hide,
|
||||||
|
/// Retrieve vectors from the DB and merge them into the `_vectors` field
|
||||||
|
///
|
||||||
|
/// this is the behavior when the vectorStore feature is enabled, and `retrieveVectors` is `true`
|
||||||
|
Retrieve,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RetrieveVectors {
|
||||||
|
pub fn new(
|
||||||
|
retrieve_vector: bool,
|
||||||
|
features: index_scheduler::RoFeatures,
|
||||||
|
) -> Result<Self, index_scheduler::Error> {
|
||||||
|
match (retrieve_vector, features.check_vector("Passing `retrieveVectors` as a parameter")) {
|
||||||
|
(true, Ok(())) => Ok(Self::Retrieve),
|
||||||
|
(true, Err(error)) => Err(error),
|
||||||
|
(false, Ok(())) => Ok(Self::Hide),
|
||||||
|
(false, Err(_)) => Ok(Self::Ignore),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn make_hits(
|
fn make_hits(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &RoTxn<'_>,
|
rtxn: &RoTxn<'_>,
|
||||||
@ -990,10 +1022,32 @@ fn make_hits(
|
|||||||
document_scores: Vec<Vec<ScoreDetails>>,
|
document_scores: Vec<Vec<ScoreDetails>>,
|
||||||
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(rtxn).unwrap();
|
||||||
let displayed_ids = index
|
let displayed_ids =
|
||||||
.displayed_fields_ids(rtxn)?
|
index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>());
|
||||||
.map(|fields| fields.into_iter().collect::<BTreeSet<_>>())
|
|
||||||
.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
|
||||||
|
|
||||||
|
let vectors_is_hidden = match (&displayed_ids, vectors_fid) {
|
||||||
|
// displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
|
||||||
|
(None, _) => false,
|
||||||
|
// displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
|
||||||
|
(Some(_), None) => true,
|
||||||
|
// displayed_ids is a finit list, so hide if `_vectors` is not part of it
|
||||||
|
(Some(map), Some(vectors_fid)) => map.contains(&vectors_fid),
|
||||||
|
};
|
||||||
|
|
||||||
|
let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors {
|
||||||
|
if vectors_is_hidden {
|
||||||
|
RetrieveVectors::Hide
|
||||||
|
} else {
|
||||||
|
RetrieveVectors::Retrieve
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
format.retrieve_vectors
|
||||||
|
};
|
||||||
|
|
||||||
|
let displayed_ids =
|
||||||
|
displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect());
|
||||||
let fids = |attrs: &BTreeSet<String>| {
|
let fids = |attrs: &BTreeSet<String>| {
|
||||||
let mut ids = BTreeSet::new();
|
let mut ids = BTreeSet::new();
|
||||||
for attr in attrs {
|
for attr in attrs {
|
||||||
@ -1016,9 +1070,7 @@ fn make_hits(
|
|||||||
.intersection(&displayed_ids)
|
.intersection(&displayed_ids)
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect();
|
.collect();
|
||||||
let is_vectors_displayed =
|
|
||||||
fields_ids_map.id("_vectors").is_some_and(|fid| displayed_ids.contains(&fid));
|
|
||||||
let retrieve_vectors = format.retrieve_vectors && is_vectors_displayed;
|
|
||||||
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default();
|
||||||
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
let attr_to_crop = format.attributes_to_crop.unwrap_or_default();
|
||||||
let formatted_options = compute_formatted_options(
|
let formatted_options = compute_formatted_options(
|
||||||
@ -1058,15 +1110,30 @@ fn make_hits(
|
|||||||
// First generate a document with all the displayed fields
|
// First generate a document with all the displayed fields
|
||||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||||
|
|
||||||
|
let add_vectors_fid =
|
||||||
|
vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve);
|
||||||
|
|
||||||
// select the attributes to retrieve
|
// select the attributes to retrieve
|
||||||
let attributes_to_retrieve = to_retrieve_ids
|
let attributes_to_retrieve = to_retrieve_ids
|
||||||
.iter()
|
.iter()
|
||||||
|
// skip the vectors_fid if RetrieveVectors::Hide
|
||||||
|
.filter(|fid| match vectors_fid {
|
||||||
|
Some(vectors_fid) => {
|
||||||
|
!(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid)
|
||||||
|
}
|
||||||
|
None => true,
|
||||||
|
})
|
||||||
|
// need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
|
||||||
|
.chain(add_vectors_fid.iter())
|
||||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||||
let mut document =
|
let mut document =
|
||||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||||
|
|
||||||
if retrieve_vectors {
|
if retrieve_vectors == RetrieveVectors::Retrieve {
|
||||||
let mut vectors = serde_json::Map::new();
|
let mut vectors = match document.remove("_vectors") {
|
||||||
|
Some(Value::Object(map)) => map,
|
||||||
|
_ => Default::default(),
|
||||||
|
};
|
||||||
for (name, vector) in index.embeddings(rtxn, id)? {
|
for (name, vector) in index.embeddings(rtxn, id)? {
|
||||||
let user_provided = embedding_configs
|
let user_provided = embedding_configs
|
||||||
.iter()
|
.iter()
|
||||||
@ -1148,6 +1215,7 @@ pub fn perform_similar(
|
|||||||
query: SimilarQuery,
|
query: SimilarQuery,
|
||||||
embedder_name: String,
|
embedder_name: String,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
retrieve_vectors: RetrieveVectors,
|
||||||
) -> Result<SimilarResult, ResponseError> {
|
) -> Result<SimilarResult, ResponseError> {
|
||||||
let before_search = Instant::now();
|
let before_search = Instant::now();
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
@ -1159,7 +1227,7 @@ pub fn perform_similar(
|
|||||||
filter: _,
|
filter: _,
|
||||||
embedder: _,
|
embedder: _,
|
||||||
attributes_to_retrieve,
|
attributes_to_retrieve,
|
||||||
retrieve_vectors,
|
retrieve_vectors: _,
|
||||||
show_ranking_score,
|
show_ranking_score,
|
||||||
show_ranking_score_details,
|
show_ranking_score_details,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
Loading…
Reference in New Issue
Block a user