mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Introduce codecs for facet types (string, f64, u64, i64)
This commit is contained in:
parent
b4951c058b
commit
e0058c1125
15
src/facet/facet_type.rs
Normal file
15
src/facet/facet_type.rs
Normal file
@ -0,0 +1,15 @@
|
||||
use std::cmp;
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
|
||||
pub enum FacetType {
|
||||
String,
|
||||
F64,
|
||||
I64,
|
||||
U64,
|
||||
}
|
||||
|
||||
impl FacetType {
|
||||
pub fn merge(a: FacetType, b: FacetType) -> FacetType {
|
||||
cmp::min(a, b)
|
||||
}
|
||||
}
|
4
src/facet/mod.rs
Normal file
4
src/facet/mod.rs
Normal file
@ -0,0 +1,4 @@
|
||||
mod facet_type;
|
||||
pub mod value_encoding;
|
||||
|
||||
pub use self::facet_type::FacetType;
|
89
src/facet/value_encoding.rs
Normal file
89
src/facet/value_encoding.rs
Normal file
@ -0,0 +1,89 @@
|
||||
// https://stackoverflow.com/a/43305015/1941280
|
||||
#[inline]
|
||||
pub fn f64_into_bytes(float: f64) -> Option<[u8; 8]> {
|
||||
if float.is_finite() {
|
||||
if float == 0.0 || float == -0.0 {
|
||||
return Some(xor_first_bit(0.0_f64.to_be_bytes()));
|
||||
} else if float.is_sign_negative() {
|
||||
return Some(xor_all_bits(float.to_be_bytes()));
|
||||
} else if float.is_sign_positive() {
|
||||
return Some(xor_first_bit(float.to_be_bytes()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn u64_into_bytes(int: u64) -> [u8; 8] {
|
||||
int.to_be_bytes()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn u64_from_bytes(bytes: [u8; 8]) -> u64 {
|
||||
u64::from_be_bytes(bytes)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn i64_into_bytes(int: i64) -> [u8; 8] {
|
||||
xor_first_bit(int.to_be_bytes())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn i64_from_bytes(bytes: [u8; 8]) -> i64 {
|
||||
i64::from_be_bytes(xor_first_bit(bytes))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xor_first_bit(mut x: [u8; 8]) -> [u8; 8] {
|
||||
x[0] ^= 0x80;
|
||||
x
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xor_all_bits(mut x: [u8; 8]) -> [u8; 8] {
|
||||
x.iter_mut().for_each(|b| *b ^= 0xff);
|
||||
x
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::cmp::Ordering::Less;
|
||||
use super::*;
|
||||
|
||||
fn is_sorted<T: Ord>(x: &[T]) -> bool {
|
||||
x.windows(2).map(|x| x[0].cmp(&x[1])).all(|o| o == Less)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ordered_f64_bytes() {
|
||||
let a = -13_f64;
|
||||
let b = -10.0;
|
||||
let c = -0.0;
|
||||
let d = 1.0;
|
||||
let e = 43.0;
|
||||
|
||||
let vec: Vec<_> = [a, b, c, d, e].iter().cloned().map(f64_into_bytes).collect();
|
||||
assert!(is_sorted(&vec), "{:?}", vec);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ordered_u64_bytes() {
|
||||
let a = 0_u64;
|
||||
let b = 1_u64;
|
||||
let c = 43_u64;
|
||||
|
||||
let vec: Vec<_> = [a, b, c].iter().cloned().map(u64_into_bytes).collect();
|
||||
assert!(is_sorted(&vec), "{:?}", vec);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ordered_i64_bytes() {
|
||||
let a = -10_i64;
|
||||
let b = -0_i64;
|
||||
let c = 1_i64;
|
||||
let d = 43_i64;
|
||||
|
||||
let vec: Vec<_> = [a, b, c, d].iter().cloned().map(i64_into_bytes).collect();
|
||||
assert!(is_sorted(&vec), "{:?}", vec);
|
||||
}
|
||||
}
|
50
src/heed_codec/facet/facet_value_f64_codec.rs
Normal file
50
src/heed_codec/facet/facet_value_f64_codec.rs
Normal file
@ -0,0 +1,50 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use crate::heed_codec::StrBytesCodec;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
|
||||
pub struct FacetValueF64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueF64Codec {
|
||||
type DItem = (&'a str, f64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (name, buffer) = StrBytesCodec::bytes_decode(bytes)?;
|
||||
let value = buffer[8..].try_into().ok().map(f64::from_be_bytes)?;
|
||||
Some((name, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueF64Codec {
|
||||
type EItem = (&'a str, f64);
|
||||
|
||||
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
// Write the globally ordered float.
|
||||
let bytes = f64_into_bytes(*value)?;
|
||||
buffer[..8].copy_from_slice(&bytes[..]);
|
||||
|
||||
// Then the f64 value just to be able to read it back.
|
||||
let bytes = value.to_be_bytes();
|
||||
buffer[8..].copy_from_slice(&bytes[..]);
|
||||
|
||||
let tuple = (*name, &buffer[..]);
|
||||
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn globally_ordered_f64() {
|
||||
let bytes = FacetValueF64Codec::bytes_encode(&("hello", -32.0)).unwrap();
|
||||
let (name, value) = FacetValueF64Codec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!((name, value), ("hello", -32.0));
|
||||
}
|
||||
}
|
28
src/heed_codec/facet/facet_value_i64_codec.rs
Normal file
28
src/heed_codec/facet/facet_value_i64_codec.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use crate::heed_codec::StrBytesCodec;
|
||||
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
|
||||
|
||||
pub struct FacetValueI64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueI64Codec {
|
||||
type DItem = (&'a str, i64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?;
|
||||
let value = bytes.try_into().map(i64_from_bytes).ok()?;
|
||||
Some((name, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueI64Codec {
|
||||
type EItem = (&'a str, i64);
|
||||
|
||||
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let value = i64_into_bytes(*value);
|
||||
let tuple = (*name, &value[..]);
|
||||
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned)
|
||||
}
|
||||
}
|
25
src/heed_codec/facet/facet_value_string_codec.rs
Normal file
25
src/heed_codec/facet/facet_value_string_codec.rs
Normal file
@ -0,0 +1,25 @@
|
||||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
use crate::heed_codec::StrBytesCodec;
|
||||
|
||||
pub struct FacetValueStringCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
||||
type DItem = (&'a str, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?;
|
||||
let value = str::from_utf8(bytes).ok()?;
|
||||
Some((name, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
|
||||
type EItem = (&'a str, &'a str);
|
||||
|
||||
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let tuple = (*name, value.as_bytes());
|
||||
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned)
|
||||
}
|
||||
}
|
28
src/heed_codec/facet/facet_value_u64_codec.rs
Normal file
28
src/heed_codec/facet/facet_value_u64_codec.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use crate::heed_codec::StrBytesCodec;
|
||||
use crate::facet::value_encoding::{u64_from_bytes, u64_into_bytes};
|
||||
|
||||
pub struct FacetValueU64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueU64Codec {
|
||||
type DItem = (&'a str, u64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?;
|
||||
let value = bytes.try_into().map(u64_from_bytes).ok()?;
|
||||
Some((name, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueU64Codec {
|
||||
type EItem = (&'a str, u64);
|
||||
|
||||
fn bytes_encode((name, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let value = u64_into_bytes(*value);
|
||||
let tuple = (*name, &value[..]);
|
||||
StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned)
|
||||
}
|
||||
}
|
9
src/heed_codec/facet/mod.rs
Normal file
9
src/heed_codec/facet/mod.rs
Normal file
@ -0,0 +1,9 @@
|
||||
mod facet_value_f64_codec;
|
||||
mod facet_value_i64_codec;
|
||||
mod facet_value_string_codec;
|
||||
mod facet_value_u64_codec;
|
||||
|
||||
pub use self::facet_value_f64_codec::FacetValueF64Codec;
|
||||
pub use self::facet_value_i64_codec::FacetValueI64Codec;
|
||||
pub use self::facet_value_string_codec::FacetValueStringCodec;
|
||||
pub use self::facet_value_u64_codec::FacetValueU64Codec;
|
@ -1,8 +1,10 @@
|
||||
mod beu32_str_codec;
|
||||
mod bo_roaring_bitmap_codec;
|
||||
mod cbo_roaring_bitmap_codec;
|
||||
mod facet;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
mod str_bytes_codec;
|
||||
mod str_str_u8_codec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
@ -10,4 +12,5 @@ pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
pub use self::str_bytes_codec::StrBytesCodec;
|
||||
pub use self::str_str_u8_codec::StrStrU8Codec;
|
||||
|
28
src/heed_codec/str_bytes_codec.rs
Normal file
28
src/heed_codec/str_bytes_codec.rs
Normal file
@ -0,0 +1,28 @@
|
||||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
pub struct StrBytesCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBytesCodec {
|
||||
type DItem = (&'a str, &'a [u8]);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, s2_bytes) = bytes.split_at(s1_end);
|
||||
let s1 = str::from_utf8(s1_bytes).ok()?;
|
||||
let s2 = &s2_bytes[1..];
|
||||
Some((s1, s2))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBytesCodec {
|
||||
type EItem = (&'a str, &'a [u8]);
|
||||
|
||||
fn bytes_encode((s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2);
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
@ -4,6 +4,7 @@ mod index;
|
||||
mod mdfs;
|
||||
mod query_tokens;
|
||||
mod search;
|
||||
pub mod facet;
|
||||
pub mod heed_codec;
|
||||
pub mod proximity;
|
||||
pub mod subcommand;
|
||||
|
Loading…
Reference in New Issue
Block a user