Expose a new vector field on the search route

This commit is contained in:
Clément Renault 2023-06-08 18:47:06 +02:00
parent cad90e8cbc
commit 642b0f3a1b
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
6 changed files with 54 additions and 1 deletions

View File

@ -295,6 +295,10 @@ pub fn perform_search(
let mut search = index.search(&rtxn);
if let Some(ref vector) = query.vector {
search.vector(vector.clone());
}
if let Some(ref query) = query.q {
search.query(query);
}

View File

@ -52,6 +52,7 @@ fn main() -> Result<(), Box<dyn Error>> {
let docs = execute_search(
&mut ctx,
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
&None,
TermsMatchingStrategy::Last,
milli::score_details::ScoringStrategy::Skip,
false,

View File

@ -7,9 +7,13 @@ pub struct DotProduct;
impl Metric<Vec<f32>> for DotProduct {
type Unit = u32;
// TODO explain me this function, I don't understand why f32.to_bits is ordered.
// I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
//
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
let dist: f32 = a.iter().zip(b).map(|(a, b)| a * b).sum();
let dist = 1.0 - dist;
debug_assert!(!dist.is_nan());
dist.to_bits()
}

View File

@ -23,6 +23,7 @@ pub mod new;
pub struct Search<'a> {
query: Option<String>,
vector: Option<Vec<f32>>,
// this should be linked to the String in the query
filter: Option<Filter<'a>>,
offset: usize,
@ -41,6 +42,7 @@ impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
Search {
query: None,
vector: None,
filter: None,
offset: 0,
limit: 20,
@ -60,6 +62,11 @@ impl<'a> Search<'a> {
self
}
pub fn vector(&mut self, vector: impl Into<Vec<f32>>) -> &mut Search<'a> {
self.vector = Some(vector.into());
self
}
pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
self.offset = offset;
self
@ -114,6 +121,7 @@ impl<'a> Search<'a> {
execute_search(
&mut ctx,
&self.query,
&self.vector,
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
@ -141,6 +149,7 @@ impl fmt::Debug for Search<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let Search {
query,
vector: _,
filter,
offset,
limit,
@ -155,6 +164,7 @@ impl fmt::Debug for Search<'_> {
} = self;
f.debug_struct("Search")
.field("query", query)
.field("vector", &"[...]")
.field("filter", filter)
.field("offset", offset)
.field("limit", limit)

View File

@ -509,6 +509,7 @@ mod tests {
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
&Some(query.to_string()),
&None,
crate::TermsMatchingStrategy::default(),
crate::score_details::ScoringStrategy::Skip,
false,

View File

@ -28,6 +28,7 @@ use db_cache::DatabaseCache;
use exact_attribute::ExactAttribute;
use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
use heed::RoTxn;
use hnsw::Searcher;
use interner::{DedupInterner, Interner};
pub use logger::visual::VisualSearchLogger;
pub use logger::{DefaultSearchLogger, SearchLogger};
@ -39,6 +40,7 @@ use ranking_rules::{
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
use roaring::RoaringBitmap;
use sort::Sort;
use space::Neighbor;
use self::geo_sort::GeoSort;
pub use self::geo_sort::Strategy as GeoSortStrategy;
@ -46,7 +48,9 @@ use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
use crate::{
AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32,
};
/// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> {
@ -350,6 +354,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
pub fn execute_search(
ctx: &mut SearchContext,
query: &Option<String>,
vector: &Option<Vec<f32>>,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
@ -442,6 +447,34 @@ pub fn execute_search(
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let docids = match vector {
Some(vector) => {
// return the nearest documents that are also part of the candidates.
let mut searcher = Searcher::new();
let hnsw = ctx.index.vector_hnsw(ctx.txn)?.unwrap_or_default();
let ef = hnsw.len().min(100);
let mut dest = vec![Neighbor { index: 0, distance: 0 }; ef];
let neighbors = hnsw.nearest(&vector, ef, &mut searcher, &mut dest[..]);
let mut docids = Vec::new();
for Neighbor { index, distance } in neighbors.iter() {
let index = BEU32::new(*index as u32);
let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get();
dbg!(distance, f32::from_bits(*distance));
if universe.contains(docid) {
docids.push(docid);
if docids.len() == length {
break;
}
}
}
docids
}
// return the search docids if the vector field is not specified
None => docids,
};
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {