diff --git a/Cargo.lock b/Cargo.lock index d28745d8b..bbe86a2a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1272,7 +1272,6 @@ dependencies = [ "maplit", "meilisearch-tokenizer", "memmap", - "num-traits", "obkv", "once_cell", "ordered-float", diff --git a/infos/src/main.rs b/infos/src/main.rs index 376679656..cc1727a68 100644 --- a/infos/src/main.rs +++ b/infos/src/main.rs @@ -274,15 +274,12 @@ fn facet_values_iter<'txn, DC: 'txn, T>( facet_type: milli::facet::FacetType, string_fn: impl Fn(&str) -> T + 'txn, float_fn: impl Fn(u8, f64, f64) -> T + 'txn, - integer_fn: impl Fn(u8, i64, i64) -> T + 'txn, ) -> heed::Result> + 'txn>> where DC: heed::BytesDecode<'txn>, { use milli::facet::FacetType; - use milli::heed_codec::facet::{ - FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec, - }; + use milli::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; let iter = db.prefix_iter(&rtxn, &[field_id])?; match facet_type { @@ -291,20 +288,13 @@ where .map(move |r| r.map(|((_, key), value)| (string_fn(key), value))); Ok(Box::new(iter) as Box>) }, - FacetType::Float => { + FacetType::Number => { let iter = iter.remap_key_type::() .map(move |r| r.map(|((_, level, left, right), value)| { (float_fn(level, left, right), value) })); Ok(Box::new(iter)) }, - FacetType::Integer => { - let iter = iter.remap_key_type::() - .map(move |r| r.map(|((_, level, left, right), value)| { - (integer_fn(level, left, right), value) - })); - Ok(Box::new(iter)) - }, } } @@ -413,11 +403,6 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho let _ = write!(&mut output, " (level {})", level); output }, - |level, left, right| { - let mut output = facet_number_value_to_string(level, left, right).1; - let _ = write!(&mut output, " (level {})", level); - output - }, )?; for result in iter { @@ -523,7 +508,6 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam *field_type, |key| (0, key.to_owned()), facet_number_value_to_string, - facet_number_value_to_string, )?; for result in iter { @@ -590,7 +574,6 @@ fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow: *field_type, |_key| 0u8, |level, _left, _right| level, - |level, _left, _right| level, )?; println!("The database {:?} facet stats", field_name); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index ffbaacc1c..b198131c1 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -22,7 +22,6 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } linked-hash-map = "0.5.4" meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.1" } memmap = "0.7.0" -num-traits = "0.2.14" obkv = "0.1.1" once_cell = "1.5.2" ordered-float = "2.1.1" diff --git a/milli/src/facet/facet_type.rs b/milli/src/facet/facet_type.rs index 4fdc80798..09f29bc00 100644 --- a/milli/src/facet/facet_type.rs +++ b/milli/src/facet/facet_type.rs @@ -8,16 +8,14 @@ use serde::{Serialize, Deserialize}; #[derive(Serialize, Deserialize)] pub enum FacetType { String, - Float, - Integer, + Number, } impl fmt::Display for FacetType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { FacetType::String => f.write_str("string"), - FacetType::Float => f.write_str("float"), - FacetType::Integer => f.write_str("integer"), + FacetType::Number => f.write_str("number"), } } } @@ -26,12 +24,10 @@ impl FromStr for FacetType { type Err = InvalidFacetType; fn from_str(s: &str) -> Result { - if s.eq_ignore_ascii_case("string") { + if s.trim().eq_ignore_ascii_case("string") { Ok(FacetType::String) - } else if s.eq_ignore_ascii_case("float") { - Ok(FacetType::Float) - } else if s.eq_ignore_ascii_case("integer") { - Ok(FacetType::Integer) + } else if s.trim().eq_ignore_ascii_case("number") { + Ok(FacetType::Number) } else { Err(InvalidFacetType) } @@ -43,7 +39,7 @@ pub struct InvalidFacetType; impl fmt::Display for InvalidFacetType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(r#"Invalid facet type, must be "string", "float" or "integer""#) + f.write_str(r#"Invalid facet type, must be "string" or "number""#) } } diff --git a/milli/src/facet/facet_value.rs b/milli/src/facet/facet_value.rs index f311ca3dd..99455fa27 100644 --- a/milli/src/facet/facet_value.rs +++ b/milli/src/facet/facet_value.rs @@ -4,8 +4,7 @@ use serde::{Serialize, Serializer}; #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] pub enum FacetValue { String(String), - Float(OrderedFloat), - Integer(i64), + Number(OrderedFloat), } impl From for FacetValue { @@ -22,24 +21,25 @@ impl From<&str> for FacetValue { impl From for FacetValue { fn from(float: f64) -> FacetValue { - FacetValue::Float(OrderedFloat(float)) + FacetValue::Number(OrderedFloat(float)) } } impl From> for FacetValue { fn from(float: OrderedFloat) -> FacetValue { - FacetValue::Float(float) + FacetValue::Number(float) } } impl From for FacetValue { fn from(integer: i64) -> FacetValue { - FacetValue::Integer(integer) + FacetValue::Number(OrderedFloat(integer as f64)) } } /// We implement Serialize ourselves because we need to always serialize it as a string, /// JSON object keys must be strings not numbers. +// TODO remove this impl and convert them into string, by hand, when required. impl Serialize for FacetValue { fn serialize(&self, serializer: S) -> Result where @@ -47,12 +47,8 @@ impl Serialize for FacetValue { { match self { FacetValue::String(string) => serializer.serialize_str(string), - FacetValue::Float(float) => { - let string = float.to_string(); - serializer.serialize_str(&string) - }, - FacetValue::Integer(integer) => { - let string = integer.to_string(); + FacetValue::Number(number) => { + let string = number.to_string(); serializer.serialize_str(&string) }, } diff --git a/milli/src/facet/value_encoding.rs b/milli/src/facet/value_encoding.rs index 3cb012a0e..7259243e5 100644 --- a/milli/src/facet/value_encoding.rs +++ b/milli/src/facet/value_encoding.rs @@ -13,16 +13,6 @@ pub fn f64_into_bytes(float: f64) -> Option<[u8; 8]> { None } -#[inline] -pub fn i64_into_bytes(int: i64) -> [u8; 8] { - xor_first_bit(int.to_be_bytes()) -} - -#[inline] -pub fn i64_from_bytes(bytes: [u8; 8]) -> i64 { - i64::from_be_bytes(xor_first_bit(bytes)) -} - #[inline] fn xor_first_bit(mut x: [u8; 8]) -> [u8; 8] { x[0] ^= 0x80; @@ -55,15 +45,4 @@ mod tests { let vec: Vec<_> = [a, b, c, d, e].iter().cloned().map(f64_into_bytes).collect(); assert!(is_sorted(&vec), "{:?}", vec); } - - #[test] - fn ordered_i64_bytes() { - let a = -10_i64; - let b = -0_i64; - let c = 1_i64; - let d = 43_i64; - - let vec: Vec<_> = [a, b, c, d].iter().cloned().map(i64_into_bytes).collect(); - assert!(is_sorted(&vec), "{:?}", vec); - } } diff --git a/milli/src/heed_codec/facet/facet_level_value_i64_codec.rs b/milli/src/heed_codec/facet/facet_level_value_i64_codec.rs deleted file mode 100644 index cc0d3120d..000000000 --- a/milli/src/heed_codec/facet/facet_level_value_i64_codec.rs +++ /dev/null @@ -1,44 +0,0 @@ -use std::borrow::Cow; -use std::convert::TryInto; - -use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes}; -use crate::FieldId; - -pub struct FacetLevelValueI64Codec; - -impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec { - type DItem = (FieldId, u8, i64, i64); - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id, bytes) = bytes.split_first()?; - let (level, bytes) = bytes.split_first()?; - - let left = bytes[..8].try_into().map(i64_from_bytes).ok()?; - let right = if *level != 0 { - bytes[8..].try_into().map(i64_from_bytes).ok()? - } else { - left - }; - - Some((*field_id, *level, left, right)) - } -} - -impl heed::BytesEncode<'_> for FacetLevelValueI64Codec { - type EItem = (FieldId, u8, i64, i64); - - fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option> { - let left = i64_into_bytes(*left); - let right = i64_into_bytes(*right); - - let mut bytes = Vec::with_capacity(2 + left.len() + right.len()); - bytes.push(*field_id); - bytes.push(*level); - bytes.extend_from_slice(&left[..]); - if *level != 0 { - bytes.extend_from_slice(&right[..]); - } - - Some(Cow::Owned(bytes)) - } -} diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_i64_codec.rs b/milli/src/heed_codec/facet/field_doc_id_facet_i64_codec.rs deleted file mode 100644 index a9eaf188c..000000000 --- a/milli/src/heed_codec/facet/field_doc_id_facet_i64_codec.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::borrow::Cow; -use std::convert::TryInto; - -use crate::facet::value_encoding::{i64_into_bytes, i64_from_bytes}; -use crate::{FieldId, DocumentId}; - -pub struct FieldDocIdFacetI64Codec; - -impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetI64Codec { - type DItem = (FieldId, DocumentId, i64); - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id, bytes) = bytes.split_first()?; - - let (document_id_bytes, bytes) = bytes.split_at(4); - let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?; - - let value = bytes[..8].try_into().map(i64_from_bytes).ok()?; - - Some((*field_id, document_id, value)) - } -} - -impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetI64Codec { - type EItem = (FieldId, DocumentId, i64); - - fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(1 + 4 + 8); - bytes.push(*field_id); - bytes.extend_from_slice(&document_id.to_be_bytes()); - bytes.extend_from_slice(&i64_into_bytes(*value)); - Some(Cow::Owned(bytes)) - } -} diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index d8ce936e0..532da12fa 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -1,13 +1,9 @@ mod facet_level_value_f64_codec; -mod facet_level_value_i64_codec; mod facet_value_string_codec; mod field_doc_id_facet_f64_codec; -mod field_doc_id_facet_i64_codec; mod field_doc_id_facet_string_codec; pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; -pub use self::facet_level_value_i64_codec::FacetLevelValueI64Codec; pub use self::facet_value_string_codec::FacetValueStringCodec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; -pub use self::field_doc_id_facet_i64_codec::FieldDocIdFacetI64Codec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 78ae540e4..1dc186720 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -2,16 +2,13 @@ use std::collections::HashMap; use std::mem::take; use anyhow::{bail, Context as _}; -use heed::{BytesDecode, BytesEncode}; use itertools::Itertools; use log::debug; -use num_traits::Bounded; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec}; -use crate::heed_codec::facet::{FieldDocIdFacetI64Codec, FieldDocIdFacetF64Codec}; +use crate::heed_codec::facet::FieldDocIdFacetF64Codec; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::FacetIter; use crate::search::query_tree::Operation; @@ -253,33 +250,17 @@ fn facet_ordered<'t>( ) -> anyhow::Result> + 't>> { match facet_type { - FacetType::Float => { + FacetType::Number => { if candidates.len() <= CANDIDATES_THRESHOLD { - let iter = iterative_facet_ordered_iter::>( + let iter = iterative_facet_ordered_iter( index, rtxn, field_id, ascending, candidates, )?; Ok(Box::new(iter.map(Ok)) as Box>) } else { let facet_fn = if ascending { - FacetIter::::new_reducing + FacetIter::new_reducing } else { - FacetIter::::new_reverse_reducing - }; - let iter = facet_fn(rtxn, index, field_id, candidates)?; - Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) - } - }, - FacetType::Integer => { - if candidates.len() <= CANDIDATES_THRESHOLD { - let iter = iterative_facet_ordered_iter::( - index, rtxn, field_id, ascending, candidates, - )?; - Ok(Box::new(iter.map(Ok)) as Box>) - } else { - let facet_fn = if ascending { - FacetIter::::new_reducing - } else { - FacetIter::::new_reverse_reducing + FacetIter::new_reverse_reducing }; let iter = facet_fn(rtxn, index, field_id, candidates)?; Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) @@ -292,28 +273,23 @@ fn facet_ordered<'t>( /// Fetch the whole list of candidates facet values one by one and order them by it. /// /// This function is fast when the amount of candidates to rank is small. -fn iterative_facet_ordered_iter<'t, KC, T, U>( +fn iterative_facet_ordered_iter<'t>( index: &'t Index, rtxn: &'t heed::RoTxn, field_id: FieldId, ascending: bool, candidates: RoaringBitmap, ) -> anyhow::Result + 't> -where - KC: BytesDecode<'t, DItem = (FieldId, u32, T)>, - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u32, T)>, - T: Bounded, - U: From + Ord + Clone + 't, { - let db = index.field_id_docid_facet_values.remap_key_type::(); + let db = index.field_id_docid_facet_values.remap_key_type::(); let mut docids_values = Vec::with_capacity(candidates.len() as usize); for docid in candidates.iter() { - let left = (field_id, docid, T::min_value()); - let right = (field_id, docid, T::max_value()); + let left = (field_id, docid, f64::MIN); + let right = (field_id, docid, f64::MAX); let mut iter = db.range(rtxn, &(left..=right))?; let entry = if ascending { iter.next() } else { iter.last() }; if let Some(((_, _, value), ())) = entry.transpose()? { - docids_values.push((docid, U::from(value))); + docids_values.push((docid, OrderedFloat(value))); } } docids_values.sort_unstable_by_key(|(_, v)| v.clone()); diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index e97f8b922..3c508b25b 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -81,28 +81,7 @@ impl<'a> FacetDistinctIter<'a> { Ok(()) } - fn distinct_integer(&mut self, id: DocumentId) -> anyhow::Result<()> { - let iter = get_facet_values::( - id, - self.distinct, - self.index, - self.txn, - )?; - - for item in iter { - let ((_, _, value), _) = item?; - // get facet docids on level 0 - let key = (self.distinct, 0, value, value); - let facet_docids = self.get_facet_docids::(&key)?; - self.excluded.union_with(&facet_docids); - } - - self.excluded.remove(id); - - Ok(()) - } - - fn distinct_float(&mut self, id: DocumentId) -> anyhow::Result<()> { + fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> { let iter = get_facet_values::(id, self.distinct, self.index, @@ -134,8 +113,7 @@ impl<'a> FacetDistinctIter<'a> { Some(id) => { match self.facet_type { FacetType::String => self.distinct_string(id)?, - FacetType::Integer => self.distinct_integer(id)?, - FacetType::Float => self.distinct_float(id)?, + FacetType::Number => self.distinct_number(id)?, }; // The first document of each iteration is kept, since the next call to @@ -233,6 +211,5 @@ mod test { test_facet_distinct!(test_string, "txt", FacetType::String); test_facet_distinct!(test_strings, "txts", FacetType::String); - test_facet_distinct!(test_int, "cat-int", FacetType::Integer); - test_facet_distinct!(test_ints, "cat-ints", FacetType::Integer); + test_facet_distinct!(test_number, "cat-int", FacetType::Number); } diff --git a/milli/src/search/facet/facet_condition.rs b/milli/src/search/facet/facet_condition.rs index 42c2327a9..525450ee1 100644 --- a/milli/src/search/facet/facet_condition.rs +++ b/milli/src/search/facet/facet_condition.rs @@ -5,17 +5,15 @@ use std::str::FromStr; use anyhow::Context; use either::Either; -use heed::types::{ByteSlice, DecodeIgnore}; +use heed::types::DecodeIgnore; use log::debug; -use num_traits::Bounded; use pest::error::{Error as PestError, ErrorVariant}; use pest::iterators::{Pair, Pairs}; use pest::Parser; use roaring::RoaringBitmap; use crate::facet::FacetType; -use crate::heed_codec::facet::FacetValueStringCodec; -use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; +use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec}; use super::FacetRange; @@ -26,17 +24,17 @@ use self::FacetCondition::*; use self::FacetNumberOperator::*; #[derive(Debug, Copy, Clone, PartialEq)] -pub enum FacetNumberOperator { - GreaterThan(T), - GreaterThanOrEqual(T), - Equal(T), - NotEqual(T), - LowerThan(T), - LowerThanOrEqual(T), - Between(T, T), +pub enum FacetNumberOperator { + GreaterThan(f64), + GreaterThanOrEqual(f64), + Equal(f64), + NotEqual(f64), + LowerThan(f64), + LowerThanOrEqual(f64), + Between(f64, f64), } -impl FacetNumberOperator { +impl FacetNumberOperator { /// This method can return two operations in case it must express /// an OR operation for the between case (i.e. `TO`). fn negate(self) -> (Self, Option) { @@ -78,9 +76,8 @@ impl FacetStringOperator { #[derive(Debug, Clone, PartialEq)] pub enum FacetCondition { - OperatorI64(FieldId, FacetNumberOperator), - OperatorF64(FieldId, FacetNumberOperator), OperatorString(FieldId, FacetStringOperator), + OperatorNumber(FieldId, FacetNumberOperator), Or(Box, Box), And(Box, Box), } @@ -173,8 +170,7 @@ impl FacetCondition { let operator = match ftype { FacetType::String => OperatorString(fid, FacetStringOperator::equal(value)), - FacetType::Float => OperatorF64(fid, FacetNumberOperator::Equal(value.parse()?)), - FacetType::Integer => OperatorI64(fid, FacetNumberOperator::Equal(value.parse()?)), + FacetType::Number => OperatorNumber(fid, FacetNumberOperator::Equal(value.parse()?)), }; if neg { Ok(operator.negate()) } else { Ok(operator) } @@ -267,15 +263,11 @@ impl FacetCondition { fn negate(self) -> FacetCondition { match self { - OperatorI64(fid, op) => match op.negate() { - (op, None) => OperatorI64(fid, op), - (a, Some(b)) => Or(Box::new(OperatorI64(fid, a)), Box::new(OperatorI64(fid, b))), - }, - OperatorF64(fid, op) => match op.negate() { - (op, None) => OperatorF64(fid, op), - (a, Some(b)) => Or(Box::new(OperatorF64(fid, a)), Box::new(OperatorF64(fid, b))), - }, OperatorString(fid, op) => OperatorString(fid, op.negate()), + OperatorNumber(fid, op) => match op.negate() { + (op, None) => OperatorNumber(fid, op), + (a, Some(b)) => Or(Box::new(OperatorNumber(fid, a)), Box::new(OperatorNumber(fid, b))), + }, Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), } @@ -293,16 +285,6 @@ impl FacetCondition { let lvalue = items.next().unwrap(); let rvalue = items.next().unwrap(); match ftype { - FacetType::Integer => { - let lvalue = pest_parse(lvalue)?; - let rvalue = pest_parse(rvalue)?; - Ok(OperatorI64(fid, Between(lvalue, rvalue))) - }, - FacetType::Float => { - let lvalue = pest_parse(lvalue)?; - let rvalue = pest_parse(rvalue)?; - Ok(OperatorF64(fid, Between(lvalue, rvalue))) - }, FacetType::String => { Err(PestError::::new_from_span( ErrorVariant::CustomError { @@ -311,6 +293,11 @@ impl FacetCondition { item_span, ).into()) }, + FacetType::Number => { + let lvalue = pest_parse(lvalue)?; + let rvalue = pest_parse(rvalue)?; + Ok(OperatorNumber(fid, Between(lvalue, rvalue))) + }, } } @@ -324,9 +311,8 @@ impl FacetCondition { let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let value = items.next().unwrap(); match ftype { - FacetType::Integer => Ok(OperatorI64(fid, Equal(pest_parse(value)?))), - FacetType::Float => Ok(OperatorF64(fid, Equal(pest_parse(value)?))), FacetType::String => Ok(OperatorString(fid, FacetStringOperator::equal(value.as_str()))), + FacetType::Number => Ok(OperatorNumber(fid, Equal(pest_parse(value)?))), } } @@ -341,8 +327,6 @@ impl FacetCondition { let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let value = items.next().unwrap(); match ftype { - FacetType::Integer => Ok(OperatorI64(fid, GreaterThan(pest_parse(value)?))), - FacetType::Float => Ok(OperatorF64(fid, GreaterThan(pest_parse(value)?))), FacetType::String => { Err(PestError::::new_from_span( ErrorVariant::CustomError { @@ -351,6 +335,7 @@ impl FacetCondition { item_span, ).into()) }, + FacetType::Number => Ok(OperatorNumber(fid, GreaterThan(pest_parse(value)?))), } } @@ -365,8 +350,6 @@ impl FacetCondition { let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let value = items.next().unwrap(); match ftype { - FacetType::Integer => Ok(OperatorI64(fid, GreaterThanOrEqual(pest_parse(value)?))), - FacetType::Float => Ok(OperatorF64(fid, GreaterThanOrEqual(pest_parse(value)?))), FacetType::String => { Err(PestError::::new_from_span( ErrorVariant::CustomError { @@ -375,6 +358,7 @@ impl FacetCondition { item_span, ).into()) }, + FacetType::Number => Ok(OperatorNumber(fid, GreaterThanOrEqual(pest_parse(value)?))), } } @@ -389,8 +373,6 @@ impl FacetCondition { let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let value = items.next().unwrap(); match ftype { - FacetType::Integer => Ok(OperatorI64(fid, LowerThan(pest_parse(value)?))), - FacetType::Float => Ok(OperatorF64(fid, LowerThan(pest_parse(value)?))), FacetType::String => { Err(PestError::::new_from_span( ErrorVariant::CustomError { @@ -399,6 +381,7 @@ impl FacetCondition { item_span, ).into()) }, + FacetType::Number => Ok(OperatorNumber(fid, LowerThan(pest_parse(value)?))), } } @@ -413,8 +396,6 @@ impl FacetCondition { let (fid, ftype) = get_field_id_facet_type(fields_ids_map, faceted_fields, &mut items)?; let value = items.next().unwrap(); match ftype { - FacetType::Integer => Ok(OperatorI64(fid, LowerThanOrEqual(pest_parse(value)?))), - FacetType::Float => Ok(OperatorF64(fid, LowerThanOrEqual(pest_parse(value)?))), FacetType::String => { Err(PestError::::new_from_span( ErrorVariant::CustomError { @@ -423,6 +404,7 @@ impl FacetCondition { item_span, ).into()) }, + FacetType::Number => Ok(OperatorNumber(fid, LowerThanOrEqual(pest_parse(value)?))), } } } @@ -430,24 +412,20 @@ impl FacetCondition { impl FacetCondition { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. - fn explore_facet_levels<'t, T: 't, KC>( - rtxn: &'t heed::RoTxn, - db: heed::Database, + fn explore_facet_number_levels( + rtxn: &heed::RoTxn, + db: heed::Database, field_id: FieldId, level: u8, - left: Bound, - right: Bound, + left: Bound, + right: Bound, output: &mut RoaringBitmap, ) -> anyhow::Result<()> - where - T: Copy + PartialEq + PartialOrd + Bounded + Debug, - KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, - KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, { match (left, right) { // If the request is an exact value we must go directly to the deepest level. (Included(l), Included(r)) if l == r && level > 0 => { - return Self::explore_facet_levels::(rtxn, db, field_id, 0, left, right, output); + return Self::explore_facet_number_levels(rtxn, db, field_id, 0, left, right, output); }, // lower TO upper when lower > upper must return no result (Included(l), Included(r)) if l > r => return Ok(()), @@ -462,7 +440,7 @@ impl FacetCondition { // We must create a custom iterator to be able to iterate over the // requested range as the range iterator cannot express some conditions. - let iter = FacetRange::new(rtxn, db.remap_key_type::(), field_id, level, left, right)?; + let iter = FacetRange::new(rtxn, db, field_id, level, left, right)?; debug!("Iterating between {:?} and {:?} (level {})", left, right, level); @@ -489,64 +467,60 @@ impl FacetCondition { if !matches!(left, Included(l) if l == left_found) { let sub_right = Excluded(left_found); debug!("calling left with {:?} to {:?} (level {})", left, sub_right, deeper_level); - Self::explore_facet_levels::(rtxn, db, field_id, deeper_level, left, sub_right, output)?; + Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, sub_right, output)?; } if !matches!(right, Included(r) if r == right_found) { let sub_left = Excluded(right_found); debug!("calling right with {:?} to {:?} (level {})", sub_left, right, deeper_level); - Self::explore_facet_levels::(rtxn, db, field_id, deeper_level, sub_left, right, output)?; + Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, sub_left, right, output)?; } }, None => { // If we found nothing at this level it means that we must find // the same bounds but at a deeper, more precise level. - Self::explore_facet_levels::(rtxn, db, field_id, deeper_level, left, right, output)?; + Self::explore_facet_number_levels(rtxn, db, field_id, deeper_level, left, right, output)?; }, } Ok(()) } - fn evaluate_number_operator<'t, T: 't, KC>( - rtxn: &'t heed::RoTxn, + fn evaluate_number_operator<>( + rtxn: &heed::RoTxn, index: &Index, - db: heed::Database, + db: heed::Database, field_id: FieldId, - operator: FacetNumberOperator, + operator: FacetNumberOperator, ) -> anyhow::Result - where - T: Copy + PartialEq + PartialOrd + Bounded + Debug, - KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, - KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the // field id and the level. let (left, right) = match operator { - GreaterThan(val) => (Excluded(val), Included(T::max_value())), - GreaterThanOrEqual(val) => (Included(val), Included(T::max_value())), - Equal(val) => (Included(val), Included(val)), + GreaterThan(val) => (Excluded(val), Included(f64::MAX)), + GreaterThanOrEqual(val) => (Included(val), Included(f64::MAX)), + Equal(val) => (Included(val), Included(val)), NotEqual(val) => { let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?; - let docids = Self::evaluate_number_operator::(rtxn, index, db, field_id, Equal(val))?; + let docids = Self::evaluate_number_operator(rtxn, index, db, field_id, Equal(val))?; return Ok(all_documents_ids - docids); }, - LowerThan(val) => (Included(T::min_value()), Excluded(val)), - LowerThanOrEqual(val) => (Included(T::min_value()), Included(val)), - Between(left, right) => (Included(left), Included(right)), + LowerThan(val) => (Included(f64::MIN), Excluded(val)), + LowerThanOrEqual(val) => (Included(f64::MIN), Included(val)), + Between(left, right) => (Included(left), Included(right)), }; // Ask for the biggest value that can exist for this specific field, if it exists // that's fine if it don't, the value just before will be returned instead. let biggest_level = db - .remap_types::() - .get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, T::max_value(), T::max_value()))? + .remap_data_type::() + .get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, f64::MAX, f64::MAX))? .and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None }); match biggest_level { Some(level) => { let mut output = RoaringBitmap::new(); - Self::explore_facet_levels::(rtxn, db, field_id, level, left, right, &mut output)?; + Self::explore_facet_number_levels(rtxn, db, field_id, level, left, right, &mut output)?; Ok(output) }, None => Ok(RoaringBitmap::new()), @@ -585,16 +559,14 @@ impl FacetCondition { { let db = index.facet_field_id_value_docids; match self { - OperatorI64(fid, op) => { - Self::evaluate_number_operator::(rtxn, index, db, *fid, *op) - }, - OperatorF64(fid, op) => { - Self::evaluate_number_operator::(rtxn, index, db, *fid, *op) - }, OperatorString(fid, op) => { let db = db.remap_key_type::(); Self::evaluate_string_operator(rtxn, index, db, *fid, op) }, + OperatorNumber(fid, op) => { + let db = db.remap_key_type::(); + Self::evaluate_number_operator(rtxn, index, db, *fid, *op) + }, Or(lhs, rhs) => { let lhs = lhs.evaluate(rtxn, index)?; let rhs = rhs.evaluate(rtxn, index)?; @@ -646,7 +618,7 @@ mod tests { } #[test] - fn i64() { + fn number() { let path = tempfile::tempdir().unwrap(); let mut options = EnvOpenOptions::new(); options.map_size(10 * 1024 * 1024); // 10 MB @@ -655,20 +627,20 @@ mod tests { // Set the faceted fields to be the channel. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashmap!{ "timestamp".into() => "integer".into() }); + builder.set_faceted_fields(hashmap!{ "timestamp".into() => "number".into() }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); // Test that the facet condition is correctly generated. let rtxn = index.read_txn().unwrap(); let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); - let expected = OperatorI64(0, Between(22, 44)); + let expected = OperatorNumber(0, Between(22.0, 44.0)); assert_eq!(condition, expected); let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); let expected = Or( - Box::new(OperatorI64(0, LowerThan(22))), - Box::new(OperatorI64(0, GreaterThan(44))), + Box::new(OperatorNumber(0, LowerThan(22.0))), + Box::new(OperatorNumber(0, GreaterThan(44.0))), ); assert_eq!(condition, expected); } @@ -686,7 +658,7 @@ mod tests { builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order builder.set_faceted_fields(hashmap!{ "channel".into() => "string".into(), - "timestamp".into() => "integer".into(), + "timestamp".into() => "number".into(), }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -700,7 +672,7 @@ mod tests { let expected = Or( Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))), Box::new(And( - Box::new(OperatorI64(1, Between(22, 44))), + Box::new(OperatorNumber(1, Between(22.0, 44.0))), Box::new(OperatorString(0, FacetStringOperator::not_equal("ponce"))), )) ); @@ -714,8 +686,8 @@ mod tests { Box::new(OperatorString(0, FacetStringOperator::equal("gotaga"))), Box::new(Or( Box::new(Or( - Box::new(OperatorI64(1, LowerThan(22))), - Box::new(OperatorI64(1, GreaterThan(44))), + Box::new(OperatorNumber(1, LowerThan(22.0))), + Box::new(OperatorNumber(1, GreaterThan(44.0))), )), Box::new(OperatorString(0, FacetStringOperator::equal("ponce"))), )), @@ -736,7 +708,7 @@ mod tests { builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order builder.set_faceted_fields(hashmap!{ "channel".into() => "string".into(), - "timestamp".into() => "integer".into(), + "timestamp".into() => "number".into(), }); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index afa4f2a5a..7fd2d385b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -7,8 +7,8 @@ use heed::BytesDecode; use roaring::RoaringBitmap; use crate::facet::{FacetType, FacetValue}; -use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; -use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; +use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; +use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use crate::search::facet::{FacetIter, FacetRange}; use crate::{Index, FieldId, DocumentId}; @@ -102,12 +102,9 @@ impl<'a> FacetDistribution<'a> { FacetType::String => { fetch_facet_values::(index, rtxn, field_id, candidates) }, - FacetType::Float => { + FacetType::Number => { fetch_facet_values::(index, rtxn, field_id, candidates) }, - FacetType::Integer => { - fetch_facet_values::(index, rtxn, field_id, candidates) - }, } } @@ -122,18 +119,11 @@ impl<'a> FacetDistribution<'a> { { let iter = match facet_type { FacetType::String => unreachable!(), - FacetType::Float => { - let iter = FacetIter::::new_non_reducing( + FacetType::Number => { + let iter = FacetIter::new_non_reducing( self.rtxn, self.index, field_id, candidates.clone(), )?; - let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))); - Box::new(iter) as Box::> - }, - FacetType::Integer => { - let iter = FacetIter::::new_non_reducing( - self.rtxn, self.index, field_id, candidates.clone(), - )?; - Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)))) + iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))) }, }; @@ -170,16 +160,9 @@ impl<'a> FacetDistribution<'a> { .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); Box::new(iter) as Box::> }, - FacetType::Float => { + FacetType::Number => { let db = db.remap_key_type::(); - let range = FacetRange::::new( - self.rtxn, db, field_id, level, Unbounded, Unbounded, - )?; - Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) - }, - FacetType::Integer => { - let db = db.remap_key_type::(); - let range = FacetRange::::new( + let range = FacetRange::new( self.rtxn, db, field_id, level, Unbounded, Unbounded, )?; Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index e5b06185f..0252af963 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,15 +1,12 @@ -use std::fmt::Debug; use std::ops::Bound::{self, Included, Excluded, Unbounded}; use either::Either::{self, Left, Right}; use heed::types::{DecodeIgnore, ByteSlice}; -use heed::{BytesEncode, BytesDecode}; use heed::{Database, RoRange, RoRevRange, LazyDecode}; -use log::debug; -use num_traits::Bounded; use roaring::RoaringBitmap; use crate::heed_codec::CboRoaringBitmapCodec; +use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::{Index, FieldId}; pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator}; @@ -19,43 +16,34 @@ mod facet_condition; mod facet_distribution; mod parser; -pub struct FacetRange<'t, T: 't, KC> { - iter: RoRange<'t, KC, LazyDecode>, - end: Bound, +pub struct FacetRange<'t> { + iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode>, + end: Bound, } -impl<'t, T: 't, KC> FacetRange<'t, T, KC> -where - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy + Bounded, -{ +impl<'t> FacetRange<'t> { pub fn new( rtxn: &'t heed::RoTxn, - db: Database, + db: Database, field_id: FieldId, level: u8, - left: Bound, - right: Bound, - ) -> heed::Result> + left: Bound, + right: Bound, + ) -> heed::Result> { let left_bound = match left { - Included(left) => Included((field_id, level, left, T::min_value())), - Excluded(left) => Excluded((field_id, level, left, T::min_value())), - Unbounded => Included((field_id, level, T::min_value(), T::min_value())), + Included(left) => Included((field_id, level, left, f64::MIN)), + Excluded(left) => Excluded((field_id, level, left, f64::MIN)), + Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), }; - let right_bound = Included((field_id, level, T::max_value(), T::max_value())); + let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?; Ok(FacetRange { iter, end: right }) } } -impl<'t, T, KC> Iterator for FacetRange<'t, T, KC> -where - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, - KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy, -{ - type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>; +impl<'t> Iterator for FacetRange<'t> { + type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; fn next(&mut self) -> Option { match self.iter.next() { @@ -80,43 +68,34 @@ where } } -pub struct FacetRevRange<'t, T: 't, KC> { - iter: RoRevRange<'t, KC, LazyDecode>, - end: Bound, +pub struct FacetRevRange<'t> { + iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode>, + end: Bound, } -impl<'t, T: 't, KC> FacetRevRange<'t, T, KC> -where - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy + Bounded, -{ +impl<'t> FacetRevRange<'t> { pub fn new( rtxn: &'t heed::RoTxn, - db: Database, + db: Database, field_id: FieldId, level: u8, - left: Bound, - right: Bound, - ) -> heed::Result> + left: Bound, + right: Bound, + ) -> heed::Result> { let left_bound = match left { - Included(left) => Included((field_id, level, left, T::min_value())), - Excluded(left) => Excluded((field_id, level, left, T::min_value())), - Unbounded => Included((field_id, level, T::min_value(), T::min_value())), + Included(left) => Included((field_id, level, left, f64::MIN)), + Excluded(left) => Excluded((field_id, level, left, f64::MIN)), + Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), }; - let right_bound = Included((field_id, level, T::max_value(), T::max_value())); + let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; Ok(FacetRevRange { iter, end: right }) } } -impl<'t, T, KC> Iterator for FacetRevRange<'t, T, KC> -where - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, - KC: BytesDecode<'t, DItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy, -{ - type Item = heed::Result<((FieldId, u8, T, T), RoaringBitmap)>; +impl<'t> Iterator for FacetRevRange<'t> { + type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; fn next(&mut self) -> Option { loop { @@ -142,20 +121,15 @@ where } } -pub struct FacetIter<'t, T: 't, KC> { +pub struct FacetIter<'t> { rtxn: &'t heed::RoTxn<'t>, - db: Database, + db: Database, field_id: FieldId, - level_iters: Vec<(RoaringBitmap, Either, FacetRevRange<'t, T, KC>>)>, + level_iters: Vec<(RoaringBitmap, Either, FacetRevRange<'t>>)>, must_reduce: bool, } -impl<'t, T, KC> FacetIter<'t, T, KC> -where - KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, - KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy + Bounded, -{ +impl<'t> FacetIter<'t> { /// Create a `FacetIter` that will iterate on the different facet entries /// (facet value + documents ids) and that will reduce the given documents ids /// while iterating on the different facet levels. @@ -164,9 +138,9 @@ where index: &'t Index, field_id: FieldId, documents_ids: RoaringBitmap, - ) -> heed::Result> + ) -> heed::Result> { - let db = index.facet_field_id_value_docids.remap_key_type::(); + let db = index.facet_field_id_value_docids.remap_key_type::(); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let level_iters = vec![(documents_ids, Left(highest_iter))]; @@ -181,9 +155,9 @@ where index: &'t Index, field_id: FieldId, documents_ids: RoaringBitmap, - ) -> heed::Result> + ) -> heed::Result> { - let db = index.facet_field_id_value_docids.remap_key_type::(); + let db = index.facet_field_id_value_docids.remap_key_type::(); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let level_iters = vec![(documents_ids, Right(highest_iter))]; @@ -199,32 +173,32 @@ where index: &'t Index, field_id: FieldId, documents_ids: RoaringBitmap, - ) -> heed::Result> + ) -> heed::Result> { - let db = index.facet_field_id_value_docids.remap_key_type::(); + let db = index.facet_field_id_value_docids.remap_key_type::(); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; let level_iters = vec![(documents_ids, Left(highest_iter))]; Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false }) } - fn highest_level(rtxn: &'t heed::RoTxn, db: Database, fid: FieldId) -> heed::Result> { + fn highest_level( + rtxn: &'t heed::RoTxn, + db: Database, + fid: FieldId, + ) -> heed::Result> + { let level = db.remap_types::() .prefix_iter(rtxn, &[fid][..])? - .remap_key_type::() + .remap_key_type::() .last().transpose()? .map(|((_, level, _, _), _)| level); Ok(level) } } -impl<'t, T: 't, KC> Iterator for FacetIter<'t, T, KC> -where - KC: heed::BytesDecode<'t, DItem = (FieldId, u8, T, T)>, - KC: for<'x> heed::BytesEncode<'x, EItem = (FieldId, u8, T, T)>, - T: PartialOrd + Copy + Bounded + Debug, -{ - type Item = heed::Result<(T, RoaringBitmap)>; +impl<'t> Iterator for FacetIter<'t> { + type Item = heed::Result<(f64, RoaringBitmap)>; fn next(&mut self) -> Option { 'outer: loop { @@ -248,7 +222,6 @@ where } if level == 0 { - debug!("found {:?} at {:?}", docids, left); return Some(Ok((left, docids))); } @@ -258,10 +231,6 @@ where let left = Included(left); let right = Included(right); - debug!("calling with {:?} to {:?} (level {}) to find {:?}", - left, right, level - 1, docids, - ); - let result = if is_ascending { FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left) } else { diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 4c5bf0a8a..8a2ba9bbf 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -10,7 +10,7 @@ use serde_json::Value; use crate::facet::FacetType; use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds}; -use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; +use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use super::ClearDocuments; pub struct DeleteDocuments<'t, 'u, 'i> { @@ -302,7 +302,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } } }, - FacetType::Float => { + FacetType::Number => { let mut iter = iter.remap_key_type::(); while let Some(result) = iter.next() { let ((_fid, docid, _value), ()) = result?; @@ -311,15 +311,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } } }, - FacetType::Integer => { - let mut iter = iter.remap_key_type::(); - while let Some(result) = iter.next() { - let ((_fid, docid, _value), ()) = result?; - if self.documents_ids.contains(docid) { - iter.del_current()?; - } - } - }, } } diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 62da5af7e..b9e4d7488 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -7,12 +7,11 @@ use grenad::{CompressionType, Reader, Writer, FileFuse}; use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesEncode, Error}; use log::debug; -use num_traits::{Bounded, Zero}; use roaring::RoaringBitmap; use crate::facet::FacetType; use crate::heed_codec::CboRoaringBitmapCodec; -use crate::heed_codec::facet::{FacetLevelValueI64Codec, FacetLevelValueF64Codec}; +use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::Index; use crate::update::index_documents::WriteMethod; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; @@ -65,58 +64,6 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { debug!("Computing and writing the facet values levels docids into LMDB on disk..."); for (field_id, facet_type) in faceted_fields { let (content, documents_ids) = match facet_type { - FacetType::Integer => { - clear_field_levels::( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; - - let documents_ids = compute_faceted_documents_ids( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; - - let content = compute_facet_levels::( - self.wtxn, - self.index.facet_field_id_value_docids, - self.chunk_compression_type, - self.chunk_compression_level, - self.chunk_fusing_shrink_size, - self.level_group_size, - self.min_level_size, - field_id, - )?; - - (Some(content), documents_ids) - }, - FacetType::Float => { - clear_field_levels::( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; - - let documents_ids = compute_faceted_documents_ids( - self.wtxn, - self.index.facet_field_id_value_docids, - field_id, - )?; - - let content = compute_facet_levels::( - self.wtxn, - self.index.facet_field_id_value_docids, - self.chunk_compression_type, - self.chunk_compression_level, - self.chunk_fusing_shrink_size, - self.level_group_size, - self.min_level_size, - field_id, - )?; - - (Some(content), documents_ids) - }, FacetType::String => { let documents_ids = compute_faceted_documents_ids( self.wtxn, @@ -126,6 +73,32 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { (None, documents_ids) }, + FacetType::Number => { + clear_field_number_levels( + self.wtxn, + self.index.facet_field_id_value_docids.remap_key_type::(), + field_id, + )?; + + let documents_ids = compute_faceted_documents_ids( + self.wtxn, + self.index.facet_field_id_value_docids, + field_id, + )?; + + let content = compute_facet_number_levels( + self.wtxn, + self.index.facet_field_id_value_docids.remap_key_type::(), + self.chunk_compression_type, + self.chunk_compression_level, + self.chunk_fusing_shrink_size, + self.level_group_size, + self.min_level_size, + field_id, + )?; + + (Some(content), documents_ids) + }, }; if let Some(content) = content { @@ -145,25 +118,21 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { } } -fn clear_field_levels<'t, T: 't, KC>( +fn clear_field_number_levels<'t, >( wtxn: &'t mut heed::RwTxn, - db: heed::Database, + db: heed::Database, field_id: u8, ) -> heed::Result<()> -where - T: Copy + Bounded, - KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, - KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, { - let left = (field_id, 1, T::min_value(), T::min_value()); - let right = (field_id, u8::MAX, T::max_value(), T::max_value()); + let left = (field_id, 1, f64::MIN, f64::MIN); + let right = (field_id, u8::MAX, f64::MAX, f64::MAX); let range = left..=right; - db.remap_key_type::().delete_range(wtxn, &range).map(drop) + db.delete_range(wtxn, &range).map(drop) } -fn compute_facet_levels<'t, T: 't, KC>( +fn compute_facet_number_levels<'t>( rtxn: &'t heed::RoTxn, - db: heed::Database, + db: heed::Database, compression_type: CompressionType, compression_level: Option, shrink_size: Option, @@ -171,12 +140,10 @@ fn compute_facet_levels<'t, T: 't, KC>( min_level_size: NonZeroUsize, field_id: u8, ) -> anyhow::Result> -where - T: Copy + PartialEq + PartialOrd + Bounded + Zero, - KC: heed::BytesDecode<'t, DItem = (u8, u8, T, T)>, - KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, { - let first_level_size = db.prefix_iter(rtxn, &[field_id])? + let first_level_size = db + .remap_key_type::() + .prefix_iter(rtxn, &[field_id])? .remap_types::() .fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?; @@ -187,8 +154,8 @@ where })?; let level_0_range = { - let left = (field_id, 0, T::min_value(), T::min_value()); - let right = (field_id, 0, T::max_value(), T::max_value()); + let left = (field_id, 0, f64::MIN, f64::MIN); + let right = (field_id, 0, f64::MAX, f64::MAX); left..=right }; @@ -199,11 +166,10 @@ where .take_while(|(_, s)| first_level_size / *s >= min_level_size.get()); for (level, group_size) in group_size_iter { - let mut left = T::zero(); - let mut right = T::zero(); + let mut left = 0.0; + let mut right = 0.0; let mut group_docids = RoaringBitmap::new(); - let db = db.remap_key_type::(); for (i, result) in db.range(rtxn, &level_0_range)?.enumerate() { let ((_field_id, _level, value, _right), docids) = result?; @@ -212,7 +178,7 @@ where } else if i % group_size == 0 { // we found the first bound of the next group, we must store the left // and right bounds associated with the docids. - write_entry::(&mut writer, field_id, level, left, right, &group_docids)?; + write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?; // We save the left bound for the new group and also reset the docids. group_docids = RoaringBitmap::new(); @@ -225,7 +191,7 @@ where } if !group_docids.is_empty() { - write_entry::(&mut writer, field_id, level, left, right, &group_docids)?; + write_number_entry(&mut writer, field_id, level, left, right, &group_docids)?; } } @@ -246,19 +212,17 @@ fn compute_faceted_documents_ids( Ok(documents_ids) } -fn write_entry( +fn write_number_entry( writer: &mut Writer, field_id: u8, level: u8, - left: T, - right: T, + left: f64, + right: f64, ids: &RoaringBitmap, ) -> anyhow::Result<()> -where - KC: for<'x> heed::BytesEncode<'x, EItem = (u8, u8, T, T)>, { let key = (field_id, level, left, right); - let key = KC::bytes_encode(&key).ok_or(Error::Encoding)?; + let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?; let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?; writer.insert(&key, &data)?; Ok(()) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index fb1a2d6c0..52949c13c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -440,6 +440,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { .enumerate() .map(|(i, documents)| { let store = Store::new( + primary_key.clone(), + fields_ids_map.clone(), searchable_fields.clone(), faceted_fields.clone(), linked_hash_map_size, diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index 03d91af24..0bd83b692 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -12,19 +12,19 @@ use fst::Set; use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; use heed::BytesEncode; use linked_hash_map::LinkedHashMap; -use log::{debug, info}; +use log::{debug, info, warn}; use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use serde_json::Value; use tempfile::tempfile; -use crate::facet::FacetType; -use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; -use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; +use crate::facet::{FacetType, FacetValue}; +use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; +use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::update::UpdateIndexingStep; -use crate::{json_to_string, SmallVec8, SmallVec32, SmallString32, Position, DocumentId, FieldId}; +use crate::{json_to_string, SmallVec8, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::merge_function::{ @@ -50,6 +50,8 @@ pub struct Readers { pub struct Store<'s, A> { // Indexing parameters + primary_key: String, + fields_ids_map: FieldsIdsMap, searchable_fields: HashSet, faceted_fields: HashMap, // Caches @@ -78,6 +80,8 @@ pub struct Store<'s, A> { impl<'s, A: AsRef<[u8]>> Store<'s, A> { pub fn new( + primary_key: String, + fields_ids_map: FieldsIdsMap, searchable_fields: HashSet, faceted_fields: HashMap, linked_hash_map_size: Option, @@ -149,6 +153,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(Store { // Indexing parameters. + primary_key, + fields_ids_map, searchable_fields, faceted_fields, // Caches @@ -365,8 +371,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { for ((field_id, value), docids) in iter { let result = match value { String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned), - Float(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned), - Integer(i) => FacetLevelValueI64Codec::bytes_encode(&(field_id, 0, i, i)).map(Cow::into_owned), + Number(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned), }; let key = result.context("could not serialize facet key")?; let bytes = CboRoaringBitmapCodec::bytes_encode(&docids) @@ -390,8 +395,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { let result = match value { String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned), - Float(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned), - Integer(i) => FieldDocIdFacetI64Codec::bytes_encode(&(field_id, document_id, *i)).map(Cow::into_owned), + Number(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned), }; let key = result.context("could not serialize facet key")?; @@ -464,9 +468,26 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { let value = serde_json::from_slice(content)?; if let Some(ftype) = self.faceted_fields.get(&attr) { - let mut values = parse_facet_value(*ftype, &value).with_context(|| { - format!("extracting facets from the value {}", value) - })?; + let mut values = match parse_facet_value(*ftype, &value) { + Ok(values) => values, + Err(e) => { + // We extract the name of the attribute and the document id + // to help users debug a facet type conversion. + let attr_name = self.fields_ids_map.name(attr).unwrap(); + let document_id: Value = self.fields_ids_map.id(&self.primary_key) + .and_then(|fid| document.get(fid)) + .map(serde_json::from_slice) + .unwrap()?; + + let context = format!( + "while extracting facet from the {:?} attribute in the {} document", + attr_name, document_id, + ); + warn!("{}", e.context(context)); + + SmallVec8::default() + }, + }; facet_values.entry(attr).or_insert_with(SmallVec8::new).extend(values.drain(..)); } @@ -605,13 +626,6 @@ fn lmdb_key_valid_size(key: &[u8]) -> bool { !key.is_empty() && key.len() <= LMDB_MAX_KEY_LENGTH } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -enum FacetValue { - String(SmallString32), - Float(OrderedFloat), - Integer(i64), -} - /// take an iterator on tokens and compute their relative position depending on separator kinds /// if it's an `Hard` separator we add an additional relative proximity of 8 between words, /// else we keep the standart proximity of 1 between words. @@ -654,54 +668,40 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result Ok(()), - Value::Bool(b) => { - output.push(Integer(*b as i64)); - Ok(()) + Value::Bool(b) => match ftype { + FacetType::String => { + output.push(String(b.to_string())); + Ok(()) + }, + FacetType::Number => { + output.push(Number(OrderedFloat(if *b { 1.0 } else { 0.0 }))); + Ok(()) + }, }, Value::Number(number) => match ftype { FacetType::String => { - let string = SmallString32::from(number.to_string()); - output.push(String(string)); + output.push(String(number.to_string())); Ok(()) }, - FacetType::Float => match number.as_f64() { + FacetType::Number => match number.as_f64() { Some(float) => { - output.push(Float(OrderedFloat(float))); + output.push(Number(OrderedFloat(float))); Ok(()) }, - None => bail!("invalid facet type, expecting {} found integer", ftype), - }, - FacetType::Integer => match number.as_i64() { - Some(integer) => { - output.push(Integer(integer)); - Ok(()) - }, - None => if number.is_f64() { - bail!("invalid facet type, expecting {} found float", ftype) - } else { - bail!("invalid facet type, expecting {} found out-of-bound integer (64bit)", ftype) - }, + None => bail!("invalid facet type, expecting {} found number", ftype), }, }, Value::String(string) => { + // TODO must be normalized and not only lowercased. let string = string.trim().to_lowercase(); - if string.is_empty() { return Ok(()) } match ftype { FacetType::String => { - let string = SmallString32::from(string); output.push(String(string)); Ok(()) }, - FacetType::Float => match string.parse() { + FacetType::Number => match string.parse() { Ok(float) => { - output.push(Float(OrderedFloat(float))); - Ok(()) - }, - Err(_err) => bail!("invalid facet type, expecting {} found string", ftype), - }, - FacetType::Integer => match string.parse() { - Ok(integer) => { - output.push(Integer(integer)); + output.push(Number(OrderedFloat(float))); Ok(()) }, Err(_err) => bail!("invalid facet type, expecting {} found string", ftype), @@ -711,7 +711,10 @@ fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result if can_recurse { values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect() } else { - bail!("invalid facet type, expecting {} found sub-array ()", ftype) + bail!( + "invalid facet type, expecting {} found array (recursive arrays are not supported)", + ftype, + ); }, Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype), } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a0cfbd315..62aa8db97 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -619,7 +619,7 @@ mod tests { // Set the faceted fields to be the age. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashmap! { "age".into() => "integer".into() }); + builder.set_faceted_fields(hashmap!{ "age".into() => "number".into() }); builder.execute(|_, _| ()).unwrap(); // Then index some documents. @@ -632,7 +632,7 @@ mod tests { // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); let fields_ids = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashmap! { "age".to_string() => FacetType::Integer }); + assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Number }); // Only count the field_id 0 and level 0 facet values. let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); assert_eq!(count, 3); @@ -812,9 +812,9 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_displayed_fields(vec!["hello".to_string()]); - builder.set_faceted_fields(hashmap! { - "age".into() => "integer".into(), - "toto".into() => "integer".into(), + builder.set_faceted_fields(hashmap!{ + "age".into() => "number".into(), + "toto".into() => "number".into(), }); builder.set_criteria(vec!["asc(toto)".to_string()]); builder.execute(|_, _| ()).unwrap();