Merge pull request #146 from meilisearch/facet-float-integer-becomes-number

Facet float-integer becomes facet number
This commit is contained in:
Clément Renault 2021-04-20 10:31:47 +02:00 committed by GitHub
commit 726fcf015a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 258 additions and 551 deletions

1
Cargo.lock generated
View File

@ -1272,7 +1272,6 @@ dependencies = [
"maplit", "maplit",
"meilisearch-tokenizer", "meilisearch-tokenizer",
"memmap", "memmap",
"num-traits",
"obkv", "obkv",
"once_cell", "once_cell",
"ordered-float", "ordered-float",

View File

@ -274,15 +274,12 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
facet_type: milli::facet::FacetType, facet_type: milli::facet::FacetType,
string_fn: impl Fn(&str) -> T + 'txn, string_fn: impl Fn(&str) -> T + 'txn,
float_fn: impl Fn(u8, f64, f64) -> T + 'txn, float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
) -> heed::Result<Box<dyn Iterator<Item=heed::Result<(T, DC::DItem)>> + 'txn>> ) -> heed::Result<Box<dyn Iterator<Item=heed::Result<(T, DC::DItem)>> + 'txn>>
where where
DC: heed::BytesDecode<'txn>, DC: heed::BytesDecode<'txn>,
{ {
use milli::facet::FacetType; use milli::facet::FacetType;
use milli::heed_codec::facet::{ use milli::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
};
let iter = db.prefix_iter(&rtxn, &[field_id])?; let iter = db.prefix_iter(&rtxn, &[field_id])?;
match facet_type { match facet_type {
@ -291,20 +288,13 @@ where
.map(move |r| r.map(|((_, key), value)| (string_fn(key), value))); .map(move |r| r.map(|((_, key), value)| (string_fn(key), value)));
Ok(Box::new(iter) as Box<dyn Iterator<Item=_>>) Ok(Box::new(iter) as Box<dyn Iterator<Item=_>>)
}, },
FacetType::Float => { FacetType::Number => {
let iter = iter.remap_key_type::<FacetLevelValueF64Codec>() let iter = iter.remap_key_type::<FacetLevelValueF64Codec>()
.map(move |r| r.map(|((_, level, left, right), value)| { .map(move |r| r.map(|((_, level, left, right), value)| {
(float_fn(level, left, right), value) (float_fn(level, left, right), value)
})); }));
Ok(Box::new(iter)) Ok(Box::new(iter))
}, },
FacetType::Integer => {
let iter = iter.remap_key_type::<FacetLevelValueI64Codec>()
.map(move |r| r.map(|((_, level, left, right), value)| {
(integer_fn(level, left, right), value)
}));
Ok(Box::new(iter))
},
} }
} }
@ -413,11 +403,6 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
let _ = write!(&mut output, " (level {})", level); let _ = write!(&mut output, " (level {})", level);
output output
}, },
|level, left, right| {
let mut output = facet_number_value_to_string(level, left, right).1;
let _ = write!(&mut output, " (level {})", level);
output
},
)?; )?;
for result in iter { for result in iter {
@ -523,7 +508,6 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
*field_type, *field_type,
|key| (0, key.to_owned()), |key| (0, key.to_owned()),
facet_number_value_to_string, facet_number_value_to_string,
facet_number_value_to_string,
)?; )?;
for result in iter { for result in iter {
@ -590,7 +574,6 @@ fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow:
*field_type, *field_type,
|_key| 0u8, |_key| 0u8,
|level, _left, _right| level, |level, _left, _right| level,
|level, _left, _right| level,
)?; )?;
println!("The database {:?} facet stats", field_name); println!("The database {:?} facet stats", field_name);

View File

@ -22,7 +22,6 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
linked-hash-map = "0.5.4" linked-hash-map = "0.5.4"
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.1" } meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.1" }
memmap = "0.7.0" memmap = "0.7.0"
num-traits = "0.2.14"
obkv = "0.1.1" obkv = "0.1.1"
once_cell = "1.5.2" once_cell = "1.5.2"
ordered-float = "2.1.1" ordered-float = "2.1.1"

View File

@ -8,16 +8,14 @@ use serde::{Serialize, Deserialize};
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub enum FacetType { pub enum FacetType {
String, String,
Float, Number,
Integer,
} }
impl fmt::Display for FacetType { impl fmt::Display for FacetType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self { match self {
FacetType::String => f.write_str("string"), FacetType::String => f.write_str("string"),
FacetType::Float => f.write_str("float"), FacetType::Number => f.write_str("number"),
FacetType::Integer => f.write_str("integer"),
} }
} }
} }
@ -26,12 +24,10 @@ impl FromStr for FacetType {
type Err = InvalidFacetType; type Err = InvalidFacetType;
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.eq_ignore_ascii_case("string") { if s.trim().eq_ignore_ascii_case("string") {
Ok(FacetType::String) Ok(FacetType::String)
} else if s.eq_ignore_ascii_case("float") { } else if s.trim().eq_ignore_ascii_case("number") {
Ok(FacetType::Float) Ok(FacetType::Number)
} else if s.eq_ignore_ascii_case("integer") {
Ok(FacetType::Integer)
} else { } else {
Err(InvalidFacetType) Err(InvalidFacetType)
} }
@ -43,7 +39,7 @@ pub struct InvalidFacetType;
impl fmt::Display for InvalidFacetType { impl fmt::Display for InvalidFacetType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(r#"Invalid facet type, must be "string", "float" or "integer""#) f.write_str(r#"Invalid facet type, must be "string" or "number""#)
} }
} }

View File

@ -4,8 +4,7 @@ use serde::{Serialize, Serializer};
#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub enum FacetValue { pub enum FacetValue {
String(String), String(String),
Float(OrderedFloat<f64>), Number(OrderedFloat<f64>),
Integer(i64),
} }
impl From<String> for FacetValue { impl From<String> for FacetValue {
@ -22,24 +21,25 @@ impl From<&str> for FacetValue {
impl From<f64> for FacetValue { impl From<f64> for FacetValue {
fn from(float: f64) -> FacetValue { fn from(float: f64) -> FacetValue {
FacetValue::Float(OrderedFloat(float)) FacetValue::Number(OrderedFloat(float))
} }
} }
impl From<OrderedFloat<f64>> for FacetValue { impl From<OrderedFloat<f64>> for FacetValue {
fn from(float: OrderedFloat<f64>) -> FacetValue { fn from(float: OrderedFloat<f64>) -> FacetValue {
FacetValue::Float(float) FacetValue::Number(float)
} }
} }
impl From<i64> for FacetValue { impl From<i64> for FacetValue {
fn from(integer: i64) -> FacetValue { fn from(integer: i64) -> FacetValue {
FacetValue::Integer(integer) FacetValue::Number(OrderedFloat(integer as f64))
} }
} }
/// We implement Serialize ourselves because we need to always serialize it as a string, /// We implement Serialize ourselves because we need to always serialize it as a string,
/// JSON object keys must be strings not numbers. /// JSON object keys must be strings not numbers.
// TODO remove this impl and convert them into string, by hand, when required.
impl Serialize for FacetValue { impl Serialize for FacetValue {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where where
@ -47,12 +47,8 @@ impl Serialize for FacetValue {
{ {
match self { match self {
FacetValue::String(string) => serializer.serialize_str(string), FacetValue::String(string) => serializer.serialize_str(string),
FacetValue::Float(float) => { FacetValue::Number(number) => {
let string = float.to_string(); let string = number.to_string();
serializer.serialize_str(&string)
},
FacetValue::Integer(integer) => {
let string = integer.to_string();
serializer.serialize_str(&string) serializer.serialize_str(&string)
}, },
} }

View File

@ -13,16 +13,6 @@ pub fn f64_into_bytes(float: f64) -> Option<[u8; 8]> {
None None
} }
#[inline]
pub fn i64_into_bytes(int: i64) -> [u8; 8] {
xor_first_bit(int.to_be_bytes())
}
#[inline]
pub fn i64_from_bytes(bytes: [u8; 8]) -> i64 {
i64::from_be_bytes(xor_first_bit(bytes))
}
#[inline] #[inline]
fn xor_first_bit(mut x: [u8; 8]) -> [u8; 8] { fn xor_first_bit(mut x: [u8; 8]) -> [u8; 8] {
x[0] ^= 0x80; x[0] ^= 0x80;
@ -55,15 +45,4 @@ mod tests {
let vec: Vec<_> = [a, b, c, d, e].iter().cloned().map(f64_into_bytes).collect(); let vec: Vec<_> = [a, b, c, d, e].iter().cloned().map(f64_into_bytes).collect();
assert!(is_sorted(&vec), "{:?}", vec); assert!(is_sorted(&vec), "{:?}", vec);
} }
#[test]
fn ordered_i64_bytes() {
let a = -10_i64;
let b = -0_i64;
let c = 1_i64;
let d = 43_i64;
let vec: Vec<_> = [a, b, c, d].iter().cloned().map(i64_into_bytes).collect();
assert!(is_sorted(&vec), "{:?}", vec);
}
} }

View File

@ -1,44 +0,0 @@
use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
use crate::FieldId;
pub struct FacetLevelValueI64Codec;
impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
type DItem = (FieldId, u8, i64, i64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (level, bytes) = bytes.split_first()?;
let left = bytes[..8].try_into().map(i64_from_bytes).ok()?;
let right = if *level != 0 {
bytes[8..].try_into().map(i64_from_bytes).ok()?
} else {
left
};
Some((*field_id, *level, left, right))
}
}
impl heed::BytesEncode<'_> for FacetLevelValueI64Codec {
type EItem = (FieldId, u8, i64, i64);
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let left = i64_into_bytes(*left);
let right = i64_into_bytes(*right);
let mut bytes = Vec::with_capacity(2 + left.len() + right.len());
bytes.push(*field_id);
bytes.push(*level);
bytes.extend_from_slice(&left[..]);
if *level != 0 {
bytes.extend_from_slice(&right[..]);
}
Some(Cow::Owned(bytes))
}
}

View File

@ -1,34 +0,0 @@
use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::{i64_into_bytes, i64_from_bytes};
use crate::{FieldId, DocumentId};
pub struct FieldDocIdFacetI64Codec;
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetI64Codec {
type DItem = (FieldId, DocumentId, i64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (document_id_bytes, bytes) = bytes.split_at(4);
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
let value = bytes[..8].try_into().map(i64_from_bytes).ok()?;
Some((*field_id, document_id, value))
}
}
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetI64Codec {
type EItem = (FieldId, DocumentId, i64);
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(1 + 4 + 8);
bytes.push(*field_id);
bytes.extend_from_slice(&document_id.to_be_bytes());
bytes.extend_from_slice(&i64_into_bytes(*value));
Some(Cow::Owned(bytes))
}
}

View File

@ -1,13 +1,9 @@
mod facet_level_value_f64_codec; mod facet_level_value_f64_codec;
mod facet_level_value_i64_codec;
mod facet_value_string_codec; mod facet_value_string_codec;
mod field_doc_id_facet_f64_codec; mod field_doc_id_facet_f64_codec;
mod field_doc_id_facet_i64_codec;
mod field_doc_id_facet_string_codec; mod field_doc_id_facet_string_codec;
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
pub use self::facet_level_value_i64_codec::FacetLevelValueI64Codec;
pub use self::facet_value_string_codec::FacetValueStringCodec; pub use self::facet_value_string_codec::FacetValueStringCodec;
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
pub use self::field_doc_id_facet_i64_codec::FieldDocIdFacetI64Codec;
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;

View File

@ -2,16 +2,13 @@ use std::collections::HashMap;
use std::mem::take; use std::mem::take;
use anyhow::{bail, Context as _}; use anyhow::{bail, Context as _};
use heed::{BytesDecode, BytesEncode};
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use num_traits::Bounded;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec}; use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::heed_codec::facet::{FieldDocIdFacetI64Codec, FieldDocIdFacetF64Codec};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::FacetIter; use crate::search::facet::FacetIter;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
@ -253,33 +250,17 @@ fn facet_ordered<'t>(
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> ) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>>
{ {
match facet_type { match facet_type {
FacetType::Float => { FacetType::Number => {
if candidates.len() <= CANDIDATES_THRESHOLD { if candidates.len() <= CANDIDATES_THRESHOLD {
let iter = iterative_facet_ordered_iter::<FieldDocIdFacetF64Codec, f64, OrderedFloat<f64>>( let iter = iterative_facet_ordered_iter(
index, rtxn, field_id, ascending, candidates, index, rtxn, field_id, ascending, candidates,
)?; )?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>) Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
} else { } else {
let facet_fn = if ascending { let facet_fn = if ascending {
FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing FacetIter::new_reducing
} else { } else {
FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing FacetIter::new_reverse_reducing
};
let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
}
},
FacetType::Integer => {
if candidates.len() <= CANDIDATES_THRESHOLD {
let iter = iterative_facet_ordered_iter::<FieldDocIdFacetI64Codec, i64, i64>(
index, rtxn, field_id, ascending, candidates,
)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
} else {
let facet_fn = if ascending {
FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing
} else {
FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing
}; };
let iter = facet_fn(rtxn, index, field_id, candidates)?; let iter = facet_fn(rtxn, index, field_id, candidates)?;
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
@ -292,28 +273,23 @@ fn facet_ordered<'t>(
/// Fetch the whole list of candidates facet values one by one and order them by it. /// Fetch the whole list of candidates facet values one by one and order them by it.
/// ///
/// This function is fast when the amount of candidates to rank is small. /// This function is fast when the amount of candidates to rank is small.
fn iterative_facet_ordered_iter<'t, KC, T, U>( fn iterative_facet_ordered_iter<'t>(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
field_id: FieldId, field_id: FieldId,
ascending: bool, ascending: bool,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> ) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't>
where
KC: BytesDecode<'t, DItem = (FieldId, u32, T)>,
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u32, T)>,
T: Bounded,
U: From<T> + Ord + Clone + 't,
{ {
let db = index.field_id_docid_facet_values.remap_key_type::<KC>(); let db = index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>();
let mut docids_values = Vec::with_capacity(candidates.len() as usize); let mut docids_values = Vec::with_capacity(candidates.len() as usize);
for docid in candidates.iter() { for docid in candidates.iter() {
let left = (field_id, docid, T::min_value()); let left = (field_id, docid, f64::MIN);
let right = (field_id, docid, T::max_value()); let right = (field_id, docid, f64::MAX);
let mut iter = db.range(rtxn, &(left..=right))?; let mut iter = db.range(rtxn, &(left..=right))?;
let entry = if ascending { iter.next() } else { iter.last() }; let entry = if ascending { iter.next() } else { iter.last() };
if let Some(((_, _, value), ())) = entry.transpose()? { if let Some(((_, _, value), ())) = entry.transpose()? {
docids_values.push((docid, U::from(value))); docids_values.push((docid, OrderedFloat(value)));
} }
} }
docids_values.sort_unstable_by_key(|(_, v)| v.clone()); docids_values.sort_unstable_by_key(|(_, v)| v.clone());

View File

@ -81,28 +81,7 @@ impl<'a> FacetDistinctIter<'a> {
Ok(()) Ok(())
} }
fn distinct_integer(&mut self, id: DocumentId) -> anyhow::Result<()> { fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> {
let iter = get_facet_values::<FieldDocIdFacetI64Codec>(
id,
self.distinct,
self.index,
self.txn,
)?;
for item in iter {
let ((_, _, value), _) = item?;
// get facet docids on level 0
let key = (self.distinct, 0, value, value);
let facet_docids = self.get_facet_docids::<FacetLevelValueI64Codec>(&key)?;
self.excluded.union_with(&facet_docids);
}
self.excluded.remove(id);
Ok(())
}
fn distinct_float(&mut self, id: DocumentId) -> anyhow::Result<()> {
let iter = get_facet_values::<FieldDocIdFacetF64Codec>(id, let iter = get_facet_values::<FieldDocIdFacetF64Codec>(id,
self.distinct, self.distinct,
self.index, self.index,
@ -134,8 +113,7 @@ impl<'a> FacetDistinctIter<'a> {
Some(id) => { Some(id) => {
match self.facet_type { match self.facet_type {
FacetType::String => self.distinct_string(id)?, FacetType::String => self.distinct_string(id)?,
FacetType::Integer => self.distinct_integer(id)?, FacetType::Number => self.distinct_number(id)?,
FacetType::Float => self.distinct_float(id)?,
}; };
// The first document of each iteration is kept, since the next call to // The first document of each iteration is kept, since the next call to
@ -233,6 +211,5 @@ mod test {
test_facet_distinct!(test_string, "txt", FacetType::String); test_facet_distinct!(test_string, "txt", FacetType::String);
test_facet_distinct!(test_strings, "txts", FacetType::String); test_facet_distinct!(test_strings, "txts", FacetType::String);
test_facet_distinct!(test_int, "cat-int", FacetType::Integer); test_facet_distinct!(test_number, "cat-int", FacetType::Number);
test_facet_distinct!(test_ints, "cat-ints", FacetType::Integer);
} }

View File

@ -5,17 +5,15 @@ use std::str::FromStr;
use anyhow::Context; use anyhow::Context;
use either::Either; use either::Either;
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::DecodeIgnore;
use log::debug; use log::debug;
use num_traits::Bounded;
use pest::error::{Error as PestError, ErrorVariant}; use pest::error::{Error as PestError, ErrorVariant};
use pest::iterators::{Pair, Pairs}; use pest::iterators::{Pair, Pairs};
use pest::Parser; use pest::Parser;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetValueStringCodec; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec};
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec}; use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec};
use super::FacetRange; use super::FacetRange;
@ -26,17 +24,17 @@ use self::FacetCondition::*;
use self::FacetNumberOperator::*; use self::FacetNumberOperator::*;
#[derive(Debug, Copy, Clone, PartialEq)] #[derive(Debug, Copy, Clone, PartialEq)]
pub enum FacetNumberOperator<T> { pub enum FacetNumberOperator {
GreaterThan(T), GreaterThan(f64),
GreaterThanOrEqual(T), GreaterThanOrEqual(f64),
Equal(T), Equal(f64),
NotEqual(T), NotEqual(f64),
LowerThan(T), LowerThan(f64),
LowerThanOrEqual(T), LowerThanOrEqual(f64),
Between(T, T), Between(f64, f64),
} }
impl<T> FacetNumberOperator<T> { impl FacetNumberOperator {
/// This method can return two operations in case it must express /// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`). /// an OR operation for the between case (i.e. `TO`).
fn negate(self) -> (Self, Option<Self>) { fn negate(self) -> (Self, Option<Self>) {
@ -78,9 +76,8 @@ impl FacetStringOperator {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum FacetCondition { pub enum FacetCondition {
OperatorI64(FieldId, FacetNumberOperator<i64>),
OperatorF64(FieldId, FacetNumberOperator<f64>),
OperatorString(FieldId, FacetStringOperator), OperatorString(FieldId, FacetStringOperator),
OperatorNumber(FieldId, FacetNumberOperator),
Or(Box<Self>, Box<Self>), Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>), And(Box<Self>, Box<Self>),
} }
@ -173,8 +170,7 @@ impl FacetCondition {
let operator = match ftype { let operator = match ftype {
FacetType::String => OperatorString(fid, FacetStringOperator::equal(value)), FacetType::String => OperatorString(fid, FacetStringOperator::equal(value)),
FacetType::Float => OperatorF64(fid, FacetNumberOperator::Equal(value.parse()?)), FacetType::Number => OperatorNumber(fid, FacetNumberOperator::Equal(value.parse()?)),
FacetType::Integer => OperatorI64(fid, FacetNumberOperator::Equal(value.parse()?)),
}; };
if neg { Ok(operator.negate()) } else { Ok(operator) } if neg { Ok(operator.negate()) } else { Ok(operator) }
@ -267,15 +263,11 @@ impl FacetCondition {
fn negate(self) -> FacetCondition { fn negate(self) -> FacetCondition {
match self { match self {
OperatorI64(fid, op) => match op.negate() {
(op, None) => OperatorI64(fid, op),
(a, Some(b)) => Or(Box::new(OperatorI64(fid, a)), Box::new(OperatorI64(fid, b))),
},
OperatorF64(fid, op) => match op.negate() {
(op, None) => OperatorF64(fid, op),
(a, Some(b)) => Or(Box::new(OperatorF64(fid, a)), Box::new(OperatorF64(fid, b))),
},
OperatorString(fid, op) => OperatorString(fid, op.negate()), OperatorString(fid, op) => OperatorString(fid, op.negate()),
OperatorNumber(fid, op) => match op.negate() {
(op, None) => OperatorNumber(fid, op),
(a, Some(b)) => Or(Box::new(OperatorNumber(fid, a)), Box::new(OperatorNumber(fid, b))),
},
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
} }
@ -293,16 +285,6 @@ impl FacetCondition {
let lvalue = items.next().unwrap(); let lvalue = items.next().unwrap();
let rvalue = items.next().unwrap(); let rvalue = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => {
let lvalue = pest_parse(lvalue)?;
let rvalue = pest_parse(rvalue)?;
Ok(OperatorI64(fid, Between(lvalue, rvalue)))
},
FacetType::Float => {
let lvalue = pest_parse(lvalue)?;
let rvalue = pest_parse(rvalue)?;
Ok(OperatorF64(fid, Between(lvalue, rvalue)))
},
FacetType::String => { FacetType::String => {
Err(PestError::<Rule>::new_from_span( Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
@ -311,6 +293,11 @@ impl FacetCondition {
item_span, item_span,
).into()) ).into())
}, },
FacetType::Number => {
let lvalue = pest_parse(lvalue)?;
let rvalue = pest_parse(rvalue)?;
Ok(OperatorNumber(fid, Between(lvalue, rvalue)))
},
} }
} }
@ -324,9 +311,8 @@ impl FacetCondition {
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => Ok(OperatorI64(fid, Equal(pest_parse(value)?))),
FacetType::Float => Ok(OperatorF64(fid, Equal(pest_parse(value)?))),
FacetType::String => Ok(OperatorString(fid, FacetStringOperator::equal(value.as_str()))), FacetType::String => Ok(OperatorString(fid, FacetStringOperator::equal(value.as_str()))),
FacetType::Number => Ok(OperatorNumber(fid, Equal(pest_parse(value)?))),
} }
} }
@ -341,8 +327,6 @@ impl FacetCondition {
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => Ok(OperatorI64(fid, GreaterThan(pest_parse(value)?))),
FacetType::Float => Ok(OperatorF64(fid, GreaterThan(pest_parse(value)?))),
FacetType::String => { FacetType::String => {
Err(PestError::<Rule>::new_from_span( Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
@ -351,6 +335,7 @@ impl FacetCondition {
item_span, item_span,
).into()) ).into())
}, },
FacetType::Number => Ok(OperatorNumber(fid, GreaterThan(pest_parse(value)?))),
} }
} }
@ -365,8 +350,6 @@ impl FacetCondition {
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => Ok(OperatorI64(fid, GreaterThanOrEqual(pest_parse(value)?))),
FacetType::Float => Ok(OperatorF64(fid, GreaterThanOrEqual(pest_parse(value)?))),
FacetType::String => { FacetType::String => {
Err(PestError::<Rule>::new_from_span( Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
@ -375,6 +358,7 @@ impl FacetCondition {
item_span, item_span,
).into()) ).into())
}, },
FacetType::Number => Ok(OperatorNumber(fid, GreaterThanOrEqual(pest_parse(value)?))),
} }
} }
@ -389,8 +373,6 @@ impl FacetCondition {
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => Ok(OperatorI64(fid, LowerThan(pest_parse(value)?))),
FacetType::Float => Ok(OperatorF64(fid, LowerThan(pest_parse(value)?))),
FacetType::String => { FacetType::String => {
Err(PestError::<Rule>::new_from_span( Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
@ -399,6 +381,7 @@ impl FacetCondition {
item_span, item_span,
).into()) ).into())
}, },
FacetType::Number => Ok(OperatorNumber(fid, LowerThan(pest_parse(value)?))),
} }
} }
@ -413,8 +396,6 @@ impl FacetCondition {
let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
match ftype { match ftype {
FacetType::Integer => Ok(OperatorI64(fid, LowerThanOrEqual(pest_parse(value)?))),
FacetType::Float => Ok(OperatorF64(fid, LowerThanOrEqual(pest_parse(value)?))),
FacetType::String => { FacetType::String => {
Err(PestError::<Rule>::new_from_span( Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { ErrorVariant::CustomError {
@ -423,6 +404,7 @@ impl FacetCondition {
item_span, item_span,
).into()) ).into())
}, },
FacetType::Number => Ok(OperatorNumber(fid, LowerThanOrEqual(pest_parse(value)?))),
} }
} }
} }
@ -430,24 +412,20 @@ impl FacetCondition {
impl FacetCondition { impl FacetCondition {
/// Aggregates the documents ids that are part of the specified range automatically /// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels. /// going deeper through the levels.
fn explore_facet_levels<'t, T: 't, KC>( fn explore_facet_number_levels(
rtxn: &'t heed::RoTxn, rtxn: &heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
level: u8, level: u8,
left: Bound<T>, left: Bound<f64>,
right: Bound<T>, right: Bound<f64>,
output: &mut RoaringBitmap, output: &mut RoaringBitmap,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
match (left, right) { match (left, right) {
// If the request is an exact value we must go directly to the deepest level. // If the request is an exact value we must go directly to the deepest level.
(Included(l), Included(r)) if l == r && level > 0 => { (Included(l), Included(r)) if l == r && level > 0 => {
return Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, 0, left, right, output); return Self::explore_facet_number_levels(rtxn, db, field_id, 0, left, right, output);
}, },
// lower TO upper when lower > upper must return no result // lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()), (Included(l), Included(r)) if l > r => return Ok(()),
@ -462,7 +440,7 @@ impl FacetCondition {
// We must create a custom iterator to be able to iterate over the // We must create a custom iterator to be able to iterate over the
// requested range as the range iterator cannot express some conditions. // requested range as the range iterator cannot express some conditions.
let iter = FacetRange::new(rtxn, db.remap_key_type::<KC>(), field_id, level, left, right)?; let iter = FacetRange::new(rtxn, db, field_id, level, left, right)?;
debug!("Iterating between {:?} and {:?} (level {})", left, right, level); debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
@ -489,64 +467,60 @@ impl FacetCondition {
if !matches!(left, Included(l) if l == left_found) { if !matches!(left, Included(l) if l == left_found) {
let sub_right = Excluded(left_found); let sub_right = Excluded(left_found);
debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level); debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level);
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, sub_right, output)?; Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, sub_right, output)?;
} }
if !matches!(right, Included(r) if r == right_found) { if !matches!(right, Included(r) if r == right_found) {
let sub_left = Excluded(right_found); let sub_left = Excluded(right_found);
debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level); debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level);
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, sub_left, right, output)?; Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, sub_left, right, output)?;
} }
}, },
None => { None => {
// If we found nothing at this level it means that we must find // If we found nothing at this level it means that we must find
// the same bounds but at a deeper, more precise level. // the same bounds but at a deeper, more precise level.
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, deeper_level, left, right, output)?; Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, right, output)?;
}, },
} }
Ok(()) Ok(())
} }
fn evaluate_number_operator<'t, T: 't, KC>( fn evaluate_number_operator<>(
rtxn: &'t heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
operator: FacetNumberOperator<T>, operator: FacetNumberOperator,
) -> anyhow::Result<RoaringBitmap> ) -> anyhow::Result<RoaringBitmap>
where
T: Copy + PartialEq + PartialOrd + Bounded + Debug,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
// Make sure we always bound the ranges with the field id and the level, // Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the // as the facets values are all in the same database and prefixed by the
// field id and the level. // field id and the level.
let (left, right) = match operator { let (left, right) = match operator {
GreaterThan(val) => (Excluded(val), Included(T::max_value())), GreaterThan(val) => (Excluded(val), Included(f64::MAX)),
GreaterThanOrEqual(val) => (Included(val), Included(T::max_value())), GreaterThanOrEqual(val) => (Included(val), Included(f64::MAX)),
Equal(val) => (Included(val), Included(val)), Equal(val) => (Included(val), Included(val)),
NotEqual(val) => { NotEqual(val) => {
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?; let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
let docids = Self::evaluate_number_operator::<T, KC>(rtxn, index, db, field_id, Equal(val))?; let docids = Self::evaluate_number_operator(rtxn, index, db, field_id, Equal(val))?;
return Ok(all_documents_ids - docids); return Ok(all_documents_ids - docids);
}, },
LowerThan(val) => (Included(T::min_value()), Excluded(val)), LowerThan(val) => (Included(f64::MIN), Excluded(val)),
LowerThanOrEqual(val) => (Included(T::min_value()), Included(val)), LowerThanOrEqual(val) => (Included(f64::MIN), Included(val)),
Between(left, right) => (Included(left), Included(right)), Between(left, right) => (Included(left), Included(right)),
}; };
// Ask for the biggest value that can exist for this specific field, if it exists // Ask for the biggest value that can exist for this specific field, if it exists
// that's fine if it don't, the value just before will be returned instead. // that's fine if it don't, the value just before will be returned instead.
let biggest_level = db let biggest_level = db
.remap_types::<KC, DecodeIgnore>() .remap_data_type::<DecodeIgnore>()
.get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, T::max_value(), T::max_value()))? .get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, f64::MAX, f64::MAX))?
.and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None }); .and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None });
match biggest_level { match biggest_level {
Some(level) => { Some(level) => {
let mut output = RoaringBitmap::new(); let mut output = RoaringBitmap::new();
Self::explore_facet_levels::<T, KC>(rtxn, db, field_id, level, left, right, &mut output)?; Self::explore_facet_number_levels(rtxn, db, field_id, level, left, right, &mut output)?;
Ok(output) Ok(output)
}, },
None => Ok(RoaringBitmap::new()), None => Ok(RoaringBitmap::new()),
@ -585,16 +559,14 @@ impl FacetCondition {
{ {
let db = index.facet_field_id_value_docids; let db = index.facet_field_id_value_docids;
match self { match self {
OperatorI64(fid, op) => {
Self::evaluate_number_operator::<i64, FacetLevelValueI64Codec>(rtxn, index, db, *fid, *op)
},
OperatorF64(fid, op) => {
Self::evaluate_number_operator::<f64, FacetLevelValueF64Codec>(rtxn, index, db, *fid, *op)
},
OperatorString(fid, op) => { OperatorString(fid, op) => {
let db = db.remap_key_type::<FacetValueStringCodec>(); let db = db.remap_key_type::<FacetValueStringCodec>();
Self::evaluate_string_operator(rtxn, index, db, *fid, op) Self::evaluate_string_operator(rtxn, index, db, *fid, op)
}, },
OperatorNumber(fid, op) => {
let db = db.remap_key_type::<FacetLevelValueF64Codec>();
Self::evaluate_number_operator(rtxn, index, db, *fid, *op)
},
Or(lhs, rhs) => { Or(lhs, rhs) => {
let lhs = lhs.evaluate(rtxn, index)?; let lhs = lhs.evaluate(rtxn, index)?;
let rhs = rhs.evaluate(rtxn, index)?; let rhs = rhs.evaluate(rtxn, index)?;
@ -646,7 +618,7 @@ mod tests {
} }
#[test] #[test]
fn i64() { fn number() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
@ -655,20 +627,20 @@ mod tests {
// Set the faceted fields to be the channel. // Set the faceted fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashmap!{ "timestamp".into() => "integer".into() }); builder.set_faceted_fields(hashmap!{ "timestamp".into() => "number".into() });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
let expected = OperatorI64(0, Between(22, 44)); let expected = OperatorNumber(0, Between(22.0, 44.0));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
let expected = Or( let expected = Or(
Box::new(OperatorI64(0, LowerThan(22))), Box::new(OperatorNumber(0, LowerThan(22.0))),
Box::new(OperatorI64(0, GreaterThan(44))), Box::new(OperatorNumber(0, GreaterThan(44.0))),
); );
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }
@ -686,7 +658,7 @@ mod tests {
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
builder.set_faceted_fields(hashmap!{ builder.set_faceted_fields(hashmap!{
"channel".into() => "string".into(), "channel".into() => "string".into(),
"timestamp".into() => "integer".into(), "timestamp".into() => "number".into(),
}); });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -700,7 +672,7 @@ mod tests {
let expected = Or( let expected = Or(
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))), Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
Box::new(And( Box::new(And(
Box::new(OperatorI64(1, Between(22, 44))), Box::new(OperatorNumber(1, Between(22.0, 44.0))),
Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))), Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))),
)) ))
); );
@ -714,8 +686,8 @@ mod tests {
Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))), Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))),
Box::new(Or( Box::new(Or(
Box::new(Or( Box::new(Or(
Box::new(OperatorI64(1, LowerThan(22))), Box::new(OperatorNumber(1, LowerThan(22.0))),
Box::new(OperatorI64(1, GreaterThan(44))), Box::new(OperatorNumber(1, GreaterThan(44.0))),
)), )),
Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))), Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))),
)), )),
@ -736,7 +708,7 @@ mod tests {
builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order
builder.set_faceted_fields(hashmap!{ builder.set_faceted_fields(hashmap!{
"channel".into() => "string".into(), "channel".into() => "string".into(),
"timestamp".into() => "integer".into(), "timestamp".into() => "number".into(),
}); });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@ -7,8 +7,8 @@ use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::{FacetType, FacetValue}; use crate::facet::{FacetType, FacetValue};
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use crate::search::facet::{FacetIter, FacetRange}; use crate::search::facet::{FacetIter, FacetRange};
use crate::{Index, FieldId, DocumentId}; use crate::{Index, FieldId, DocumentId};
@ -102,12 +102,9 @@ impl<'a> FacetDistribution<'a> {
FacetType::String => { FacetType::String => {
fetch_facet_values::<FieldDocIdFacetStringCodec, _>(index, rtxn, field_id, candidates) fetch_facet_values::<FieldDocIdFacetStringCodec, _>(index, rtxn, field_id, candidates)
}, },
FacetType::Float => { FacetType::Number => {
fetch_facet_values::<FieldDocIdFacetF64Codec, _>(index, rtxn, field_id, candidates) fetch_facet_values::<FieldDocIdFacetF64Codec, _>(index, rtxn, field_id, candidates)
}, },
FacetType::Integer => {
fetch_facet_values::<FieldDocIdFacetI64Codec, _>(index, rtxn, field_id, candidates)
},
} }
} }
@ -122,18 +119,11 @@ impl<'a> FacetDistribution<'a> {
{ {
let iter = match facet_type { let iter = match facet_type {
FacetType::String => unreachable!(), FacetType::String => unreachable!(),
FacetType::Float => { FacetType::Number => {
let iter = FacetIter::<f64, FacetLevelValueF64Codec>::new_non_reducing( let iter = FacetIter::new_non_reducing(
self.rtxn, self.index, field_id, candidates.clone(), self.rtxn, self.index, field_id, candidates.clone(),
)?; )?;
let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))); iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)))
Box::new(iter) as Box::<dyn Iterator<Item=_>>
},
FacetType::Integer => {
let iter = FacetIter::<i64, FacetLevelValueI64Codec>::new_non_reducing(
self.rtxn, self.index, field_id, candidates.clone(),
)?;
Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))))
}, },
}; };
@ -170,16 +160,9 @@ impl<'a> FacetDistribution<'a> {
.map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids)));
Box::new(iter) as Box::<dyn Iterator<Item=_>> Box::new(iter) as Box::<dyn Iterator<Item=_>>
}, },
FacetType::Float => { FacetType::Number => {
let db = db.remap_key_type::<FacetLevelValueF64Codec>(); let db = db.remap_key_type::<FacetLevelValueF64Codec>();
let range = FacetRange::<f64, _>::new( let range = FacetRange::new(
self.rtxn, db, field_id, level, Unbounded, Unbounded,
)?;
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))
},
FacetType::Integer => {
let db = db.remap_key_type::<FacetLevelValueI64Codec>();
let range = FacetRange::<i64, _>::new(
self.rtxn, db, field_id, level, Unbounded, Unbounded, self.rtxn, db, field_id, level, Unbounded, Unbounded,
)?; )?;
Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids))))

View File

@ -1,15 +1,12 @@
use std::fmt::Debug;
use std::ops::Bound::{self, Included, Excluded, Unbounded}; use std::ops::Bound::{self, Included, Excluded, Unbounded};
use either::Either::{self, Left, Right}; use either::Either::{self, Left, Right};
use heed::types::{DecodeIgnore, ByteSlice}; use heed::types::{DecodeIgnore, ByteSlice};
use heed::{BytesEncode, BytesDecode};
use heed::{Database, RoRange, RoRevRange, LazyDecode}; use heed::{Database, RoRange, RoRevRange, LazyDecode};
use log::debug;
use num_traits::Bounded;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::{Index, FieldId}; use crate::{Index, FieldId};
pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator}; pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator};
@ -19,43 +16,34 @@ mod facet_condition;
mod facet_distribution; mod facet_distribution;
mod parser; mod parser;
pub struct FacetRange<'t, T: 't, KC> { pub struct FacetRange<'t> {
iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
end: Bound<T>, end: Bound<f64>,
} }
impl<'t, T: 't, KC> FacetRange<'t, T, KC> impl<'t> FacetRange<'t> {
where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded,
{
pub fn new( pub fn new(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
level: u8, level: u8,
left: Bound<T>, left: Bound<f64>,
right: Bound<T>, right: Bound<f64>,
) -> heed::Result<FacetRange<'t, T, KC>> ) -> heed::Result<FacetRange<'t>>
{ {
let left_bound = match left { let left_bound = match left {
Included(left) => Included((field_id, level, left, T::min_value())), Included(left) => Included((field_id, level, left, f64::MIN)),
Excluded(left) => Excluded((field_id, level, left, T::min_value())), Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
Unbounded => Included((field_id, level, T::min_value(), T::min_value())), Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
}; };
let right_bound = Included((field_id, level, T::max_value(), T::max_value())); let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?; let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
Ok(FacetRange { iter, end: right }) Ok(FacetRange { iter, end: right })
} }
} }
impl<'t, T, KC> Iterator for FacetRange<'t, T, KC> impl<'t> Iterator for FacetRange<'t> {
where type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy,
{
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
match self.iter.next() { match self.iter.next() {
@ -80,43 +68,34 @@ where
} }
} }
pub struct FacetRevRange<'t, T: 't, KC> { pub struct FacetRevRange<'t> {
iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
end: Bound<T>, end: Bound<f64>,
} }
impl<'t, T: 't, KC> FacetRevRange<'t, T, KC> impl<'t> FacetRevRange<'t> {
where
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded,
{
pub fn new( pub fn new(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
level: u8, level: u8,
left: Bound<T>, left: Bound<f64>,
right: Bound<T>, right: Bound<f64>,
) -> heed::Result<FacetRevRange<'t, T, KC>> ) -> heed::Result<FacetRevRange<'t>>
{ {
let left_bound = match left { let left_bound = match left {
Included(left) => Included((field_id, level, left, T::min_value())), Included(left) => Included((field_id, level, left, f64::MIN)),
Excluded(left) => Excluded((field_id, level, left, T::min_value())), Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
Unbounded => Included((field_id, level, T::min_value(), T::min_value())), Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
}; };
let right_bound = Included((field_id, level, T::max_value(), T::max_value())); let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
Ok(FacetRevRange { iter, end: right }) Ok(FacetRevRange { iter, end: right })
} }
} }
impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC> impl<'t> Iterator for FacetRevRange<'t> {
where type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy,
{
type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { loop {
@ -142,20 +121,15 @@ where
} }
} }
pub struct FacetIter<'t, T: 't, KC> { pub struct FacetIter<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: Database<KC, CboRoaringBitmapCodec>, db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>, level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t>, FacetRevRange<'t>>)>,
must_reduce: bool, must_reduce: bool,
} }
impl<'t, T, KC> FacetIter<'t, T, KC> impl<'t> FacetIter<'t> {
where
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded,
{
/// Create a `FacetIter` that will iterate on the different facet entries /// Create a `FacetIter` that will iterate on the different facet entries
/// (facet value + documents ids) and that will reduce the given documents ids /// (facet value + documents ids) and that will reduce the given documents ids
/// while iterating on the different facet levels. /// while iterating on the different facet levels.
@ -164,9 +138,9 @@ where
index: &'t Index, index: &'t Index,
field_id: FieldId, field_id: FieldId,
documents_ids: RoaringBitmap, documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t, T, KC>> ) -> heed::Result<FacetIter<'t>>
{ {
let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); let db = index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Left(highest_iter))]; let level_iters = vec![(documents_ids, Left(highest_iter))];
@ -181,9 +155,9 @@ where
index: &'t Index, index: &'t Index,
field_id: FieldId, field_id: FieldId,
documents_ids: RoaringBitmap, documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t, T, KC>> ) -> heed::Result<FacetIter<'t>>
{ {
let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); let db = index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Right(highest_iter))]; let level_iters = vec![(documents_ids, Right(highest_iter))];
@ -199,32 +173,32 @@ where
index: &'t Index, index: &'t Index,
field_id: FieldId, field_id: FieldId,
documents_ids: RoaringBitmap, documents_ids: RoaringBitmap,
) -> heed::Result<FacetIter<'t, T, KC>> ) -> heed::Result<FacetIter<'t>>
{ {
let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); let db = index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>();
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
let level_iters = vec![(documents_ids, Left(highest_iter))]; let level_iters = vec![(documents_ids, Left(highest_iter))];
Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false }) Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false })
} }
fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> { fn highest_level<X>(
rtxn: &'t heed::RoTxn,
db: Database<FacetLevelValueF64Codec, X>,
fid: FieldId,
) -> heed::Result<Option<u8>>
{
let level = db.remap_types::<ByteSlice, DecodeIgnore>() let level = db.remap_types::<ByteSlice, DecodeIgnore>()
.prefix_iter(rtxn, &[fid][..])? .prefix_iter(rtxn, &[fid][..])?
.remap_key_type::<KC>() .remap_key_type::<FacetLevelValueF64Codec>()
.last().transpose()? .last().transpose()?
.map(|((_, level, _, _), _)| level); .map(|((_, level, _, _), _)| level);
Ok(level) Ok(level)
} }
} }
impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC> impl<'t> Iterator for FacetIter<'t> {
where type Item = heed::Result<(f64, RoaringBitmap)>;
KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>,
T: PartialOrd + Copy + Bounded + Debug,
{
type Item = heed::Result<(T, RoaringBitmap)>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
'outer: loop { 'outer: loop {
@ -248,7 +222,6 @@ where
} }
if level == 0 { if level == 0 {
debug!("found {:?} at {:?}", docids, left);
return Some(Ok((left, docids))); return Some(Ok((left, docids)));
} }
@ -258,10 +231,6 @@ where
let left = Included(left); let left = Included(left);
let right = Included(right); let right = Included(right);
debug!("calling with {:?} to {:?} (level {}) to find {:?}",
left, right, level - 1, docids,
);
let result = if is_ascending { let result = if is_ascending {
FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left) FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left)
} else { } else {

View File

@ -10,7 +10,7 @@ use serde_json::Value;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds}; use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use super::ClearDocuments; use super::ClearDocuments;
pub struct DeleteDocuments<'t, 'u, 'i> { pub struct DeleteDocuments<'t, 'u, 'i> {
@ -302,7 +302,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
} }
} }
}, },
FacetType::Float => { FacetType::Number => {
let mut iter = iter.remap_key_type::<FieldDocIdFacetF64Codec>(); let mut iter = iter.remap_key_type::<FieldDocIdFacetF64Codec>();
while let Some(result) = iter.next() { while let Some(result) = iter.next() {
let ((_fid, docid, _value), ()) = result?; let ((_fid, docid, _value), ()) = result?;
@ -311,15 +311,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
} }
} }
}, },
FacetType::Integer => {
let mut iter = iter.remap_key_type::<FieldDocIdFacetI64Codec>();
while let Some(result) = iter.next() {
let ((_fid, docid, _value), ()) = result?;
if self.documents_ids.contains(docid) {
iter.del_current()?;
}
}
},
} }
} }

View File

@ -7,12 +7,11 @@ use grenad::{CompressionType, Reader, Writer, FileFuse};
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesEncode, Error}; use heed::{BytesEncode, Error};
use log::debug; use log::debug;
use num_traits::{Bounded, Zero};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::Index; use crate::Index;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
@ -65,58 +64,6 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
debug!("Computing and writing the facet values levels docids into LMDB on disk..."); debug!("Computing and writing the facet values levels docids into LMDB on disk...");
for (field_id, facet_type) in faceted_fields { for (field_id, facet_type) in faceted_fields {
let (content, documents_ids) = match facet_type { let (content, documents_ids) = match facet_type {
FacetType::Integer => {
clear_field_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
self.chunk_compression_level,
self.chunk_fusing_shrink_size,
self.level_group_size,
self.min_level_size,
field_id,
)?;
(Some(content), documents_ids)
},
FacetType::Float => {
clear_field_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
self.chunk_compression_level,
self.chunk_fusing_shrink_size,
self.level_group_size,
self.min_level_size,
field_id,
)?;
(Some(content), documents_ids)
},
FacetType::String => { FacetType::String => {
let documents_ids = compute_faceted_documents_ids( let documents_ids = compute_faceted_documents_ids(
self.wtxn, self.wtxn,
@ -126,6 +73,32 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
(None, documents_ids) (None, documents_ids)
}, },
FacetType::Number => {
clear_field_number_levels(
self.wtxn,
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
field_id,
)?;
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_number_levels(
self.wtxn,
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
self.chunk_compression_type,
self.chunk_compression_level,
self.chunk_fusing_shrink_size,
self.level_group_size,
self.min_level_size,
field_id,
)?;
(Some(content), documents_ids)
},
}; };
if let Some(content) = content { if let Some(content) = content {
@ -145,25 +118,21 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
} }
} }
fn clear_field_levels<'t, T: 't, KC>( fn clear_field_number_levels<'t, >(
wtxn: &'t mut heed::RwTxn, wtxn: &'t mut heed::RwTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
field_id: u8, field_id: u8,
) -> heed::Result<()> ) -> heed::Result<()>
where
T: Copy + Bounded,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let left = (field_id, 1, T::min_value(), T::min_value()); let left = (field_id, 1, f64::MIN, f64::MIN);
let right = (field_id, u8::MAX, T::max_value(), T::max_value()); let right = (field_id, u8::MAX, f64::MAX, f64::MAX);
let range = left..=right; let range = left..=right;
db.remap_key_type::<KC>().delete_range(wtxn, &range).map(drop) db.delete_range(wtxn, &range).map(drop)
} }
fn compute_facet_levels<'t, T: 't, KC>( fn compute_facet_number_levels<'t>(
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
compression_type: CompressionType, compression_type: CompressionType,
compression_level: Option<u32>, compression_level: Option<u32>,
shrink_size: Option<u64>, shrink_size: Option<u64>,
@ -171,12 +140,10 @@ fn compute_facet_levels<'t, T: 't, KC>(
min_level_size: NonZeroUsize, min_level_size: NonZeroUsize,
field_id: u8, field_id: u8,
) -> anyhow::Result<Reader<FileFuse>> ) -> anyhow::Result<Reader<FileFuse>>
where
T: Copy + PartialEq + PartialOrd + Bounded + Zero,
KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>,
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let first_level_size = db.prefix_iter(rtxn, &[field_id])? let first_level_size = db
.remap_key_type::<ByteSlice>()
.prefix_iter(rtxn, &[field_id])?
.remap_types::<DecodeIgnore, DecodeIgnore>() .remap_types::<DecodeIgnore, DecodeIgnore>()
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?; .fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
@ -187,8 +154,8 @@ where
})?; })?;
let level_0_range = { let level_0_range = {
let left = (field_id, 0, T::min_value(), T::min_value()); let left = (field_id, 0, f64::MIN, f64::MIN);
let right = (field_id, 0, T::max_value(), T::max_value()); let right = (field_id, 0, f64::MAX, f64::MAX);
left..=right left..=right
}; };
@ -199,11 +166,10 @@ where
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get()); .take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
for (level, group_size) in group_size_iter { for (level, group_size) in group_size_iter {
let mut left = T::zero(); let mut left = 0.0;
let mut right = T::zero(); let mut right = 0.0;
let mut group_docids = RoaringBitmap::new(); let mut group_docids = RoaringBitmap::new();
let db = db.remap_key_type::<KC>();
for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() { for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() {
let ((_field_id, _level, value, _right), docids) = result?; let ((_field_id, _level, value, _right), docids) = result?;
@ -212,7 +178,7 @@ where
} else if i % group_size == 0 { } else if i % group_size == 0 {
// we found the first bound of the next group, we must store the left // we found the first bound of the next group, we must store the left
// and right bounds associated with the docids. // and right bounds associated with the docids.
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?; write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?;
// We save the left bound for the new group and also reset the docids. // We save the left bound for the new group and also reset the docids.
group_docids = RoaringBitmap::new(); group_docids = RoaringBitmap::new();
@ -225,7 +191,7 @@ where
} }
if !group_docids.is_empty() { if !group_docids.is_empty() {
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?; write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?;
} }
} }
@ -246,19 +212,17 @@ fn compute_faceted_documents_ids(
Ok(documents_ids) Ok(documents_ids)
} }
fn write_entry<T, KC>( fn write_number_entry(
writer: &mut Writer<File>, writer: &mut Writer<File>,
field_id: u8, field_id: u8,
level: u8, level: u8,
left: T, left: f64,
right: T, right: f64,
ids: &RoaringBitmap, ids: &RoaringBitmap,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where
KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>,
{ {
let key = (field_id, level, left, right); let key = (field_id, level, left, right);
let key = KC::bytes_encode(&key).ok_or(Error::Encoding)?; let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?; let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?;
writer.insert(&key, &data)?; writer.insert(&key, &data)?;
Ok(()) Ok(())

View File

@ -440,6 +440,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
.enumerate() .enumerate()
.map(|(i, documents)| { .map(|(i, documents)| {
let store = Store::new( let store = Store::new(
primary_key.clone(),
fields_ids_map.clone(),
searchable_fields.clone(), searchable_fields.clone(),
faceted_fields.clone(), faceted_fields.clone(),
linked_hash_map_size, linked_hash_map_size,

View File

@ -12,19 +12,19 @@ use fst::Set;
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
use heed::BytesEncode; use heed::BytesEncode;
use linked_hash_map::LinkedHashMap; use linked_hash_map::LinkedHashMap;
use log::{debug, info}; use log::{debug, info, warn};
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind};
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use crate::facet::FacetType; use crate::facet::{FacetType, FacetValue};
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::update::UpdateIndexingStep; use crate::update::UpdateIndexingStep;
use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId}; use crate::{json_to_string, SmallVec8, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap};
use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
use super::merge_function::{ use super::merge_function::{
@ -50,6 +50,8 @@ pub struct Readers {
pub struct Store<'s, A> { pub struct Store<'s, A> {
// Indexing parameters // Indexing parameters
primary_key: String,
fields_ids_map: FieldsIdsMap,
searchable_fields: HashSet<FieldId>, searchable_fields: HashSet<FieldId>,
faceted_fields: HashMap<FieldId, FacetType>, faceted_fields: HashMap<FieldId, FacetType>,
// Caches // Caches
@ -78,6 +80,8 @@ pub struct Store<'s, A> {
impl<'s, A: AsRef<[u8]>> Store<'s, A> { impl<'s, A: AsRef<[u8]>> Store<'s, A> {
pub fn new( pub fn new(
primary_key: String,
fields_ids_map: FieldsIdsMap,
searchable_fields: HashSet<FieldId>, searchable_fields: HashSet<FieldId>,
faceted_fields: HashMap<FieldId, FacetType>, faceted_fields: HashMap<FieldId, FacetType>,
linked_hash_map_size: Option<usize>, linked_hash_map_size: Option<usize>,
@ -149,6 +153,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(Store { Ok(Store {
// Indexing parameters. // Indexing parameters.
primary_key,
fields_ids_map,
searchable_fields, searchable_fields,
faceted_fields, faceted_fields,
// Caches // Caches
@ -365,8 +371,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
for ((field_id, value), docids) in iter { for ((field_id, value), docids) in iter {
let result = match value { let result = match value {
String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned), String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned),
Float(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned), Number(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned),
Integer(i) => FacetLevelValueI64Codec::bytes_encode(&(field_id, 0, i, i)).map(Cow::into_owned),
}; };
let key = result.context("could not serialize facet key")?; let key = result.context("could not serialize facet key")?;
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
@ -390,8 +395,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
let result = match value { let result = match value {
String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned), String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned),
Float(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned), Number(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned),
Integer(i) => FieldDocIdFacetI64Codec::bytes_encode(&(field_id, document_id, *i)).map(Cow::into_owned),
}; };
let key = result.context("could not serialize facet key")?; let key = result.context("could not serialize facet key")?;
@ -464,9 +468,26 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
let value = serde_json::from_slice(content)?; let value = serde_json::from_slice(content)?;
if let Some(ftype) = self.faceted_fields.get(&attr) { if let Some(ftype) = self.faceted_fields.get(&attr) {
let mut values = parse_facet_value(*ftype, &value).with_context(|| { let mut values = match parse_facet_value(*ftype, &value) {
format!("extracting facets from the value {}", value) Ok(values) => values,
})?; Err(e) => {
// We extract the name of the attribute and the document id
// to help users debug a facet type conversion.
let attr_name = self.fields_ids_map.name(attr).unwrap();
let document_id: Value = self.fields_ids_map.id(&self.primary_key)
.and_then(|fid| document.get(fid))
.map(serde_json::from_slice)
.unwrap()?;
let context = format!(
"while extracting facet from the {:?} attribute in the {} document",
attr_name, document_id,
);
warn!("{}", e.context(context));
SmallVec8::default()
},
};
facet_values.entry(attr).or_insert_with(SmallVec8::new).extend(values.drain(..)); facet_values.entry(attr).or_insert_with(SmallVec8::new).extend(values.drain(..));
} }
@ -605,13 +626,6 @@ fn lmdb_key_valid_size(key: &[u8]) -> bool {
!key.is_empty() && key.len() <= LMDB_MAX_KEY_LENGTH !key.is_empty() && key.len() <= LMDB_MAX_KEY_LENGTH
} }
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum FacetValue {
String(SmallString32),
Float(OrderedFloat<f64>),
Integer(i64),
}
/// take an iterator on tokens and compute their relative position depending on separator kinds /// take an iterator on tokens and compute their relative position depending on separator kinds
/// if it's an `Hard` separator we add an additional relative proximity of 8 between words, /// if it's an `Hard` separator we add an additional relative proximity of 8 between words,
/// else we keep the standart proximity of 1 between words. /// else we keep the standart proximity of 1 between words.
@ -654,54 +668,40 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec
{ {
match value { match value {
Value::Null => Ok(()), Value::Null => Ok(()),
Value::Bool(b) => { Value::Bool(b) => match ftype {
output.push(Integer(*b as i64)); FacetType::String => {
Ok(()) output.push(String(b.to_string()));
Ok(())
},
FacetType::Number => {
output.push(Number(OrderedFloat(if *b { 1.0 } else { 0.0 })));
Ok(())
},
}, },
Value::Number(number) => match ftype { Value::Number(number) => match ftype {
FacetType::String => { FacetType::String => {
let string = SmallString32::from(number.to_string()); output.push(String(number.to_string()));
output.push(String(string));
Ok(()) Ok(())
}, },
FacetType::Float => match number.as_f64() { FacetType::Number => match number.as_f64() {
Some(float) => { Some(float) => {
output.push(Float(OrderedFloat(float))); output.push(Number(OrderedFloat(float)));
Ok(()) Ok(())
}, },
None => bail!("invalid facet type, expecting {} found integer", ftype), None => bail!("invalid facet type, expecting {} found number", ftype),
},
FacetType::Integer => match number.as_i64() {
Some(integer) => {
output.push(Integer(integer));
Ok(())
},
None => if number.is_f64() {
bail!("invalid facet type, expecting {} found float", ftype)
} else {
bail!("invalid facet type, expecting {} found out-of-bound integer (64bit)", ftype)
},
}, },
}, },
Value::String(string) => { Value::String(string) => {
// TODO must be normalized and not only lowercased.
let string = string.trim().to_lowercase(); let string = string.trim().to_lowercase();
if string.is_empty() { return Ok(()) }
match ftype { match ftype {
FacetType::String => { FacetType::String => {
let string = SmallString32::from(string);
output.push(String(string)); output.push(String(string));
Ok(()) Ok(())
}, },
FacetType::Float => match string.parse() { FacetType::Number => match string.parse() {
Ok(float) => { Ok(float) => {
output.push(Float(OrderedFloat(float))); output.push(Number(OrderedFloat(float)));
Ok(())
},
Err(_err) => bail!("invalid facet type, expecting {} found string", ftype),
},
FacetType::Integer => match string.parse() {
Ok(integer) => {
output.push(Integer(integer));
Ok(()) Ok(())
}, },
Err(_err) => bail!("invalid facet type, expecting {} found string", ftype), Err(_err) => bail!("invalid facet type, expecting {} found string", ftype),
@ -711,7 +711,10 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec
Value::Array(values) => if can_recurse { Value::Array(values) => if can_recurse {
values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect() values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect()
} else { } else {
bail!("invalid facet type, expecting {} found sub-array ()", ftype) bail!(
"invalid facet type, expecting {} found array (recursive arrays are not supported)",
ftype,
);
}, },
Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype), Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype),
} }

View File

@ -619,7 +619,7 @@ mod tests {
// Set the faceted fields to be the age. // Set the faceted fields to be the age.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashmap! { "age".into() => "integer".into() }); builder.set_faceted_fields(hashmap!{ "age".into() => "number".into() });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
// Then index some documents. // Then index some documents.
@ -632,7 +632,7 @@ mod tests {
// Check that the displayed fields are correctly set. // Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids = index.faceted_fields(&rtxn).unwrap(); let fields_ids = index.faceted_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashmap! { "age".to_string() => FacetType::Integer }); assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Number });
// Only count the field_id 0 and level 0 facet values. // Only count the field_id 0 and level 0 facet values.
let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count();
assert_eq!(count, 3); assert_eq!(count, 3);
@ -812,9 +812,9 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_displayed_fields(vec!["hello".to_string()]); builder.set_displayed_fields(vec!["hello".to_string()]);
builder.set_faceted_fields(hashmap! { builder.set_faceted_fields(hashmap!{
"age".into() => "integer".into(), "age".into() => "number".into(),
"toto".into() => "integer".into(), "toto".into() => "number".into(),
}); });
builder.set_criteria(vec!["asc(toto)".to_string()]); builder.set_criteria(vec!["asc(toto)".to_string()]);
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();