mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Move the proximity function to a module
This commit is contained in:
parent
15208c7d3d
commit
e34437b2d7
@ -2,6 +2,7 @@ mod criterion;
|
|||||||
mod query_tokens;
|
mod query_tokens;
|
||||||
mod search;
|
mod search;
|
||||||
pub mod heed_codec;
|
pub mod heed_codec;
|
||||||
|
pub mod proximity;
|
||||||
pub mod tokenizer;
|
pub mod tokenizer;
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
28
src/proximity.rs
Normal file
28
src/proximity.rs
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
use std::cmp;
|
||||||
|
use crate::{Attribute, Position};
|
||||||
|
|
||||||
|
const ONE_ATTRIBUTE: u32 = 1000;
|
||||||
|
const MAX_DISTANCE: u32 = 8;
|
||||||
|
|
||||||
|
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||||
|
if lhs <= rhs {
|
||||||
|
cmp::min(rhs - lhs, MAX_DISTANCE)
|
||||||
|
} else {
|
||||||
|
cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
|
||||||
|
let (lhs_attr, lhs_index) = extract_position(lhs);
|
||||||
|
let (rhs_attr, rhs_index) = extract_position(rhs);
|
||||||
|
if lhs_attr != rhs_attr { MAX_DISTANCE }
|
||||||
|
else { index_proximity(lhs_index, rhs_index) }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extract_position(position: Position) -> (Attribute, Position) {
|
||||||
|
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn path_proximity(path: &[Position]) -> u32 {
|
||||||
|
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
|
||||||
|
}
|
@ -10,6 +10,7 @@ use roaring::bitmap::{IntoIter, RoaringBitmap};
|
|||||||
|
|
||||||
use near_proximity::near_proximity;
|
use near_proximity::near_proximity;
|
||||||
|
|
||||||
|
use crate::proximity::path_proximity;
|
||||||
use crate::query_tokens::{QueryTokens, QueryToken};
|
use crate::query_tokens::{QueryTokens, QueryToken};
|
||||||
use crate::{Index, DocumentId, Position};
|
use crate::{Index, DocumentId, Position};
|
||||||
|
|
||||||
@ -194,33 +195,6 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
|
|
||||||
// TODO move this function elsewhere
|
|
||||||
fn compute_proximity(path: &[Position]) -> u32 {
|
|
||||||
const ONE_ATTRIBUTE: u32 = 1000;
|
|
||||||
const MAX_DISTANCE: u32 = 8;
|
|
||||||
|
|
||||||
fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
|
||||||
if lhs <= rhs {
|
|
||||||
cmp::min(rhs - lhs, MAX_DISTANCE)
|
|
||||||
} else {
|
|
||||||
cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn positions_proximity(lhs: u32, rhs: u32) -> u32 {
|
|
||||||
let (lhs_attr, lhs_index) = extract_position(lhs);
|
|
||||||
let (rhs_attr, rhs_index) = extract_position(rhs);
|
|
||||||
if lhs_attr != rhs_attr { MAX_DISTANCE }
|
|
||||||
else { index_proximity(lhs_index, rhs_index) }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract_position(position: u32) -> (u32, u32) {
|
|
||||||
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
|
|
||||||
}
|
|
||||||
|
|
||||||
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there only is one word, no need to compute the best proximities.
|
// If there only is one word, no need to compute the best proximities.
|
||||||
if derived_words.len() == 1 {
|
if derived_words.len() == 1 {
|
||||||
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
|
||||||
@ -231,7 +205,7 @@ impl<'a> Search<'a> {
|
|||||||
let mut paths = Vec::new();
|
let mut paths = Vec::new();
|
||||||
for candidate in candidates {
|
for candidate in candidates {
|
||||||
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
|
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
|
||||||
near_proximity(keywords, &mut paths, compute_proximity);
|
near_proximity(keywords, &mut paths, path_proximity);
|
||||||
if let Some((prox, _path)) = paths.first() {
|
if let Some((prox, _path)) = paths.first() {
|
||||||
documents.push((*prox, candidate));
|
documents.push((*prox, candidate));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user