Remove the useless euclidean distance implementation

This commit is contained in:
Clément Renault 2023-06-27 12:29:40 +02:00
parent 29d8268c94
commit ebad1f396f
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
2 changed files with 5 additions and 24 deletions

View File

@ -7,10 +7,10 @@ pub struct DotProduct;
impl Metric<Vec<f32>> for DotProduct { impl Metric<Vec<f32>> for DotProduct {
type Unit = u32; type Unit = u32;
// TODO explain me this function, I don't understand why f32.to_bits is ordered.
// I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
//
// Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>. // Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
//
// Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
// <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit { fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
let dist = 1.0 - dot_product_similarity(a, b); let dist = 1.0 - dot_product_similarity(a, b);
debug_assert!(!dist.is_nan()); debug_assert!(!dist.is_nan());
@ -23,22 +23,3 @@ impl Metric<Vec<f32>> for DotProduct {
pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 { pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b).map(|(a, b)| a * b).sum() a.iter().zip(b).map(|(a, b)| a * b).sum()
} }
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
pub struct Euclidean;
impl Metric<Vec<f32>> for Euclidean {
type Unit = u32;
fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
let dist = euclidean_squared_distance(a, b).sqrt();
debug_assert!(!dist.is_nan());
dist.to_bits()
}
}
/// Return the squared euclidean distance between both vectors that will
/// between 0.0 and +inf. The smaller the nearer the vectors are.
pub fn euclidean_squared_distance(a: &[f32], b: &[f32]) -> f32 {
a.iter().zip(b).map(|(a, b)| (a - b).powi(2)).sum()
}

View File

@ -32,7 +32,7 @@ use std::convert::{TryFrom, TryInto};
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
pub use distance::{dot_product_similarity, euclidean_squared_distance}; pub use distance::dot_product_similarity;
pub use filter_parser::{Condition, FilterCondition, Span, Token}; pub use filter_parser::{Condition, FilterCondition, Span, Token};
use fxhash::{FxHasher32, FxHasher64}; use fxhash::{FxHasher32, FxHasher64};
pub use grenad::CompressionType; pub use grenad::CompressionType;
@ -304,7 +304,7 @@ impl VectorOrArrayOfVectors {
} }
} }
/// Normalize a vector by dividing the dimensions by the lenght of it. /// Normalize a vector by dividing the dimensions by the length of it.
pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> { pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
let squared: f32 = vector.iter().map(|x| x * x).sum(); let squared: f32 = vector.iter().map(|x| x * x).sum();
let length = squared.sqrt(); let length = squared.sqrt();