diff --git a/milli/src/distance.rs b/milli/src/distance.rs index 1b91b4654..c838e4bd4 100644 --- a/milli/src/distance.rs +++ b/milli/src/distance.rs @@ -7,10 +7,10 @@ pub struct DotProduct; impl Metric> for DotProduct { type Unit = u32; - // TODO explain me this function, I don't understand why f32.to_bits is ordered. - // I tried to do this and it wasn't OK - // // Following . + // + // Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0: + // fn distance(&self, a: &Vec, b: &Vec) -> Self::Unit { let dist = 1.0 - dot_product_similarity(a, b); debug_assert!(!dist.is_nan()); @@ -23,22 +23,3 @@ impl Metric> for DotProduct { pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 { a.iter().zip(b).map(|(a, b)| a * b).sum() } - -#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] -pub struct Euclidean; - -impl Metric> for Euclidean { - type Unit = u32; - - fn distance(&self, a: &Vec, b: &Vec) -> Self::Unit { - let dist = euclidean_squared_distance(a, b).sqrt(); - debug_assert!(!dist.is_nan()); - dist.to_bits() - } -} - -/// Return the squared euclidean distance between both vectors that will -/// between 0.0 and +inf. The smaller the nearer the vectors are. -pub fn euclidean_squared_distance(a: &[f32], b: &[f32]) -> f32 { - a.iter().zip(b).map(|(a, b)| (a - b).powi(2)).sum() -} diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 626c30ab0..99126f60e 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -32,7 +32,7 @@ use std::convert::{TryFrom, TryInto}; use std::hash::BuildHasherDefault; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; -pub use distance::{dot_product_similarity, euclidean_squared_distance}; +pub use distance::dot_product_similarity; pub use filter_parser::{Condition, FilterCondition, Span, Token}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; @@ -304,7 +304,7 @@ impl VectorOrArrayOfVectors { } } -/// Normalize a vector by dividing the dimensions by the lenght of it. +/// Normalize a vector by dividing the dimensions by the length of it. pub fn normalize_vector(mut vector: Vec) -> Vec { let squared: f32 = vector.iter().map(|x| x * x).sum(); let length = squared.sqrt();