Move the proximity function to a module

This commit is contained in:
Clément Renault 2020-09-22 10:53:20 +02:00
parent 15208c7d3d
commit e34437b2d7
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 31 additions and 28 deletions

View File

@ -2,6 +2,7 @@ mod criterion;
mod query_tokens; mod query_tokens;
mod search; mod search;
pub mod heed_codec; pub mod heed_codec;
pub mod proximity;
pub mod tokenizer; pub mod tokenizer;
use std::collections::HashMap; use std::collections::HashMap;

28
src/proximity.rs Normal file
View File

@ -0,0 +1,28 @@
use std::cmp;
use crate::{Attribute, Position};
const ONE_ATTRIBUTE: u32 = 1000;
const MAX_DISTANCE: u32 = 8;
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
if lhs <= rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
}
}
pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
let (lhs_attr, lhs_index) = extract_position(lhs);
let (rhs_attr, rhs_index) = extract_position(rhs);
if lhs_attr != rhs_attr { MAX_DISTANCE }
else { index_proximity(lhs_index, rhs_index) }
}
pub fn extract_position(position: Position) -> (Attribute, Position) {
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
}
pub fn path_proximity(path: &[Position]) -> u32 {
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
}

View File

@ -10,6 +10,7 @@ use roaring::bitmap::{IntoIter, RoaringBitmap};
use near_proximity::near_proximity; use near_proximity::near_proximity;
use crate::proximity::path_proximity;
use crate::query_tokens::{QueryTokens, QueryToken}; use crate::query_tokens::{QueryTokens, QueryToken};
use crate::{Index, DocumentId, Position}; use crate::{Index, DocumentId, Position};
@ -194,33 +195,6 @@ impl<'a> Search<'a> {
let mut documents = Vec::new(); let mut documents = Vec::new();
// TODO move this function elsewhere
fn compute_proximity(path: &[Position]) -> u32 {
const ONE_ATTRIBUTE: u32 = 1000;
const MAX_DISTANCE: u32 = 8;
fn index_proximity(lhs: u32, rhs: u32) -> u32 {
if lhs <= rhs {
cmp::min(rhs - lhs, MAX_DISTANCE)
} else {
cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
}
}
fn positions_proximity(lhs: u32, rhs: u32) -> u32 {
let (lhs_attr, lhs_index) = extract_position(lhs);
let (rhs_attr, rhs_index) = extract_position(rhs);
if lhs_attr != rhs_attr { MAX_DISTANCE }
else { index_proximity(lhs_index, rhs_index) }
}
fn extract_position(position: u32) -> (u32, u32) {
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
}
path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
}
// If there only is one word, no need to compute the best proximities. // If there only is one word, no need to compute the best proximities.
if derived_words.len() == 1 { if derived_words.len() == 1 {
let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect(); let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect();
@ -231,7 +205,7 @@ impl<'a> Search<'a> {
let mut paths = Vec::new(); let mut paths = Vec::new();
for candidate in candidates { for candidate in candidates {
let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?; let keywords = Self::fecth_keywords(rtxn, index, &derived_words, candidate)?;
near_proximity(keywords, &mut paths, compute_proximity); near_proximity(keywords, &mut paths, path_proximity);
if let Some((prox, _path)) = paths.first() { if let Some((prox, _path)) = paths.first() {
documents.push((*prox, candidate)); documents.push((*prox, candidate));
} }