Remove the useless euclidean distance implementation

2024-11-26 12:05:05 +08:00 · 2023-06-27 12:29:40 +02:00 · 2023-06-27 12:29:40 +02:00 · ebad1f396f
commit ebad1f396f
parent 29d8268c94
2 changed files with 5 additions and 24 deletions
--- a/milli/src/distance.rs
+++ b/milli/src/distance.rs
@ -7,10 +7,10 @@ pub struct DotProduct;
 impl Metric<Vec<f32>> for DotProduct {
    type Unit = u32;
    // TODO explain me this function, I don't understand why f32.to_bits is ordered.
    // I tried to do this and it wasn't OK <https://stackoverflow.com/a/43305015/1941280>
    //
    // Following <https://docs.rs/space/0.17.0/space/trait.Metric.html>.
    //
    // Here is a playground that validate the ordering of the bit representation of floats in range 0.0..=1.0:
    // <https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=6c59e31a3cc5036b32edf51e8937b56e>
    fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
        let dist = 1.0 - dot_product_similarity(a, b);
        debug_assert!(!dist.is_nan());
@ -23,22 +23,3 @@ impl Metric<Vec<f32>> for DotProduct {
 pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
    a.iter().zip(b).map(|(a, b)| a * b).sum()
 }
 #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
 pub struct Euclidean;
 impl Metric<Vec<f32>> for Euclidean {
    type Unit = u32;
    fn distance(&self, a: &Vec<f32>, b: &Vec<f32>) -> Self::Unit {
        let dist = euclidean_squared_distance(a, b).sqrt();
        debug_assert!(!dist.is_nan());
        dist.to_bits()
    }
 }
 /// Return the squared euclidean distance between both vectors that will
 /// between 0.0 and +inf. The smaller the nearer the vectors are.
 pub fn euclidean_squared_distance(a: &[f32], b: &[f32]) -> f32 {
    a.iter().zip(b).map(|(a, b)| (a - b).powi(2)).sum()
 }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -32,7 +32,7 @@ use std::convert::{TryFrom, TryInto};
 use std::hash::BuildHasherDefault;
 use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
-pub use distance::{dot_product_similarity, euclidean_squared_distance};
+pub use distance::dot_product_similarity;
 pub use filter_parser::{Condition, FilterCondition, Span, Token};
 use fxhash::{FxHasher32, FxHasher64};
 pub use grenad::CompressionType;
@ -304,7 +304,7 @@ impl VectorOrArrayOfVectors {
    }
 }
-/// Normalize a vector by dividing the dimensions by the lenght of it.
+/// Normalize a vector by dividing the dimensions by the length of it.
 pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
    let squared: f32 = vector.iter().map(|x| x * x).sum();
    let length = squared.sqrt();