mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Expose a new vector field on the search route
This commit is contained in:
parent
cad90e8cbc
commit
642b0f3a1b
@ -295,6 +295,10 @@ pub fn perform_search(
|
|||||||
|
|
||||||
let mut search = index.search(&rtxn);
|
let mut search = index.search(&rtxn);
|
||||||
|
|
||||||
|
if let Some(ref vector) = query.vector {
|
||||||
|
search.vector(vector.clone());
|
||||||
|
}
|
||||||
|
|
||||||
if let Some(ref query) = query.q {
|
if let Some(ref query) = query.q {
|
||||||
search.query(query);
|
search.query(query);
|
||||||
}
|
}
|
||||||
|
@ -52,6 +52,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let docs = execute_search(
|
let docs = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||||
|
&None,
|
||||||
TermsMatchingStrategy::Last,
|
TermsMatchingStrategy::Last,
|
||||||
milli::score_details::ScoringStrategy::Skip,
|
milli::score_details::ScoringStrategy::Skip,
|
||||||
false,
|
false,
|
||||||
|
@ -7,9 +7,13 @@ pub struct DotProduct;
|
|||||||
impl Metric<Vec<f32>> for DotProduct {
|
impl Metric<Vec<f32>> for DotProduct {
|
||||||
type Unit = u32;
|
type Unit = u32;
|
||||||
|
|
||||||
|
// TODO explain me this function, I don't understand why f32.to_bits is ordered.
|
||||||
|
// I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
|
||||||
|
//
|
||||||
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
|
||||||
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
|
||||||
let dist: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
|
let dist: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
|
||||||
|
let dist = 1.0 - dist;
|
||||||
debug_assert!(!dist.is_nan());
|
debug_assert!(!dist.is_nan());
|
||||||
dist.to_bits()
|
dist.to_bits()
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,7 @@ pub mod new;
|
|||||||
|
|
||||||
pub struct Search<'a> {
|
pub struct Search<'a> {
|
||||||
query: Option<String>,
|
query: Option<String>,
|
||||||
|
vector: Option<Vec<f32>>,
|
||||||
// this should be linked to the String in the query
|
// this should be linked to the String in the query
|
||||||
filter: Option<Filter<'a>>,
|
filter: Option<Filter<'a>>,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
@ -41,6 +42,7 @@ impl<'a> Search<'a> {
|
|||||||
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
|
||||||
Search {
|
Search {
|
||||||
query: None,
|
query: None,
|
||||||
|
vector: None,
|
||||||
filter: None,
|
filter: None,
|
||||||
offset: 0,
|
offset: 0,
|
||||||
limit: 20,
|
limit: 20,
|
||||||
@ -60,6 +62,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn vector(&mut self, vector: impl Into<Vec<f32>>) -> &mut Search<'a> {
|
||||||
|
self.vector = Some(vector.into());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
|
pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
|
||||||
self.offset = offset;
|
self.offset = offset;
|
||||||
self
|
self
|
||||||
@ -114,6 +121,7 @@ impl<'a> Search<'a> {
|
|||||||
execute_search(
|
execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
&self.query,
|
&self.query,
|
||||||
|
&self.vector,
|
||||||
self.terms_matching_strategy,
|
self.terms_matching_strategy,
|
||||||
self.scoring_strategy,
|
self.scoring_strategy,
|
||||||
self.exhaustive_number_hits,
|
self.exhaustive_number_hits,
|
||||||
@ -141,6 +149,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let Search {
|
let Search {
|
||||||
query,
|
query,
|
||||||
|
vector: _,
|
||||||
filter,
|
filter,
|
||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
@ -155,6 +164,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
} = self;
|
} = self;
|
||||||
f.debug_struct("Search")
|
f.debug_struct("Search")
|
||||||
.field("query", query)
|
.field("query", query)
|
||||||
|
.field("vector", &"[...]")
|
||||||
.field("filter", filter)
|
.field("filter", filter)
|
||||||
.field("offset", offset)
|
.field("offset", offset)
|
||||||
.field("limit", limit)
|
.field("limit", limit)
|
||||||
|
@ -509,6 +509,7 @@ mod tests {
|
|||||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
&Some(query.to_string()),
|
&Some(query.to_string()),
|
||||||
|
&None,
|
||||||
crate::TermsMatchingStrategy::default(),
|
crate::TermsMatchingStrategy::default(),
|
||||||
crate::score_details::ScoringStrategy::Skip,
|
crate::score_details::ScoringStrategy::Skip,
|
||||||
false,
|
false,
|
||||||
|
@ -28,6 +28,7 @@ use db_cache::DatabaseCache;
|
|||||||
use exact_attribute::ExactAttribute;
|
use exact_attribute::ExactAttribute;
|
||||||
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use hnsw::Searcher;
|
||||||
use interner::{DedupInterner, Interner};
|
use interner::{DedupInterner, Interner};
|
||||||
pub use logger::visual::VisualSearchLogger;
|
pub use logger::visual::VisualSearchLogger;
|
||||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||||
@ -39,6 +40,7 @@ use ranking_rules::{
|
|||||||
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use sort::Sort;
|
use sort::Sort;
|
||||||
|
use space::Neighbor;
|
||||||
|
|
||||||
use self::geo_sort::GeoSort;
|
use self::geo_sort::GeoSort;
|
||||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||||
@ -46,7 +48,9 @@ use self::graph_based_ranking_rule::Words;
|
|||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use crate::search::new::distinct::apply_distinct_rule;
|
use crate::search::new::distinct::apply_distinct_rule;
|
||||||
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
|
use crate::{
|
||||||
|
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
|
||||||
|
};
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
pub struct SearchContext<'ctx> {
|
pub struct SearchContext<'ctx> {
|
||||||
@ -350,6 +354,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
pub fn execute_search(
|
pub fn execute_search(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
query: &Option<String>,
|
query: &Option<String>,
|
||||||
|
vector: &Option<Vec<f32>>,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
@ -442,6 +447,34 @@ pub fn execute_search(
|
|||||||
|
|
||||||
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
|
||||||
|
|
||||||
|
let docids = match vector {
|
||||||
|
Some(vector) => {
|
||||||
|
// return the nearest documents that are also part of the candidates.
|
||||||
|
let mut searcher = Searcher::new();
|
||||||
|
let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
|
||||||
|
let ef = hnsw.len().min(100);
|
||||||
|
let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
|
||||||
|
let neighbors = hnsw.nearest(&vector, ef, &mut searcher, &mut dest[..]);
|
||||||
|
|
||||||
|
let mut docids = Vec::new();
|
||||||
|
for Neighbor { index, distance } in neighbors.iter() {
|
||||||
|
let index = BEU32::new(*index as u32);
|
||||||
|
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
|
||||||
|
dbg!(distance, f32::from_bits(*distance));
|
||||||
|
if universe.contains(docid) {
|
||||||
|
docids.push(docid);
|
||||||
|
if docids.len() == length {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
docids
|
||||||
|
}
|
||||||
|
// return the search docids if the vector field is not specified
|
||||||
|
None => docids,
|
||||||
|
};
|
||||||
|
|
||||||
// The candidates is the universe unless the exhaustive number of hits
|
// The candidates is the universe unless the exhaustive number of hits
|
||||||
// is requested and a distinct attribute is set.
|
// is requested and a distinct attribute is set.
|
||||||
if exhaustive_number_hits {
|
if exhaustive_number_hits {
|
||||||
|
Loading…
Reference in New Issue
Block a user