2021-08-31 17:44:15 +08:00
|
|
|
#[macro_use]
|
|
|
|
pub mod documents;
|
|
|
|
|
2021-09-22 21:18:39 +08:00
|
|
|
mod asc_desc;
|
2020-08-12 16:43:02 +08:00
|
|
|
mod criterion;
|
2021-06-09 23:05:46 +08:00
|
|
|
mod error;
|
2020-11-22 21:48:42 +08:00
|
|
|
mod external_documents_ids;
|
2020-11-11 22:48:24 +08:00
|
|
|
pub mod facet;
|
2021-06-17 00:33:33 +08:00
|
|
|
mod fields_ids_map;
|
2020-08-28 20:16:37 +08:00
|
|
|
pub mod heed_codec;
|
2021-02-15 01:55:15 +08:00
|
|
|
pub mod index;
|
2020-09-22 16:53:20 +08:00
|
|
|
pub mod proximity;
|
2021-06-17 00:33:33 +08:00
|
|
|
mod search;
|
2020-10-26 01:32:01 +08:00
|
|
|
pub mod update;
|
2020-06-05 02:25:51 +08:00
|
|
|
|
2021-06-17 23:05:34 +08:00
|
|
|
use std::collections::{BTreeMap, HashMap};
|
2021-07-06 17:31:24 +08:00
|
|
|
use std::convert::{TryFrom, TryInto};
|
2020-05-31 22:09:34 +08:00
|
|
|
use std::hash::BuildHasherDefault;
|
2020-10-31 23:10:15 +08:00
|
|
|
|
2020-06-30 04:25:59 +08:00
|
|
|
use fxhash::{FxHasher32, FxHasher64};
|
2021-08-25 00:15:31 +08:00
|
|
|
pub use grenad::CompressionType;
|
2020-11-05 20:34:15 +08:00
|
|
|
use serde_json::{Map, Value};
|
2020-06-05 02:25:51 +08:00
|
|
|
|
2021-09-28 01:07:22 +08:00
|
|
|
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
2021-09-22 22:02:07 +08:00
|
|
|
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
2021-06-17 21:16:20 +08:00
|
|
|
pub use self::error::{
|
|
|
|
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
|
|
|
};
|
2020-11-23 00:53:33 +08:00
|
|
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
2020-10-23 20:11:00 +08:00
|
|
|
pub use self::fields_ids_map::FieldsIdsMap;
|
2021-06-17 00:33:33 +08:00
|
|
|
pub use self::heed_codec::{
|
|
|
|
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
|
|
|
|
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
|
2021-10-05 17:18:42 +08:00
|
|
|
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec,
|
2021-06-17 00:33:33 +08:00
|
|
|
};
|
2020-10-21 21:55:48 +08:00
|
|
|
pub use self::index::Index;
|
2021-10-22 23:23:22 +08:00
|
|
|
pub use self::search::{
|
|
|
|
Condition, FacetDistribution, Filter, FilterCondition, MatchingWords, Search, SearchResult,
|
|
|
|
};
|
2020-05-31 22:09:34 +08:00
|
|
|
|
2021-06-14 22:46:19 +08:00
|
|
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
|
|
|
|
2020-05-31 22:09:34 +08:00
|
|
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
2020-06-30 04:25:59 +08:00
|
|
|
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
|
2020-05-31 22:09:34 +08:00
|
|
|
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
2020-06-11 17:55:03 +08:00
|
|
|
pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
|
2021-02-17 18:12:38 +08:00
|
|
|
pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
|
2020-11-13 21:49:48 +08:00
|
|
|
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
|
2020-05-31 22:09:34 +08:00
|
|
|
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
|
2020-10-18 21:16:57 +08:00
|
|
|
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
|
2020-08-06 17:08:24 +08:00
|
|
|
pub type Attribute = u32;
|
2020-11-27 00:38:08 +08:00
|
|
|
pub type DocumentId = u32;
|
2021-07-06 17:31:24 +08:00
|
|
|
pub type FieldId = u16;
|
2020-07-07 18:21:22 +08:00
|
|
|
pub type Position = u32;
|
2021-09-22 23:48:24 +08:00
|
|
|
pub type RelativePosition = u16;
|
2021-06-21 21:57:41 +08:00
|
|
|
pub type FieldDistribution = BTreeMap<String, u64>;
|
2021-08-23 22:32:11 +08:00
|
|
|
pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 2], DocumentId>;
|
2020-10-31 23:10:15 +08:00
|
|
|
|
2021-09-22 23:48:24 +08:00
|
|
|
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
|
|
|
|
|
|
|
|
// Convert an absolute word position into a relative position.
|
|
|
|
// Return the field id of the attribute related to the absolute position
|
|
|
|
// and the relative position in the attribute.
|
|
|
|
pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, RelativePosition) {
|
|
|
|
((absolute >> 16) as u16, (absolute & 0xFFFF) as u16)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the absolute word position with the field id of the attribute and relative position in the attribute.
|
|
|
|
pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
|
|
|
|
(field_id as u32) << 16 | (relative as u32)
|
|
|
|
}
|
|
|
|
|
2020-11-05 20:34:15 +08:00
|
|
|
/// Transform a raw obkv store into a JSON Object.
|
|
|
|
pub fn obkv_to_json(
|
2020-11-27 00:38:08 +08:00
|
|
|
displayed_fields: &[FieldId],
|
2020-11-05 20:34:15 +08:00
|
|
|
fields_ids_map: &FieldsIdsMap,
|
2021-07-06 17:31:24 +08:00
|
|
|
obkv: obkv::KvReaderU16,
|
2021-06-17 00:33:33 +08:00
|
|
|
) -> Result<Map<String, Value>> {
|
|
|
|
displayed_fields
|
|
|
|
.iter()
|
2020-11-05 20:34:15 +08:00
|
|
|
.copied()
|
|
|
|
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
|
|
|
|
.map(|(id, value)| {
|
2021-06-14 22:46:19 +08:00
|
|
|
let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
|
|
|
|
field_id: id,
|
2021-06-15 17:10:50 +08:00
|
|
|
process: "obkv_to_json",
|
2021-06-14 22:46:19 +08:00
|
|
|
})?;
|
|
|
|
let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
|
2020-11-05 20:34:15 +08:00
|
|
|
Ok((name.to_owned(), value))
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
2020-11-06 23:15:07 +08:00
|
|
|
|
|
|
|
/// Transform a JSON value into a string that can be indexed.
|
2020-11-12 00:33:05 +08:00
|
|
|
pub fn json_to_string(value: &Value) -> Option<String> {
|
|
|
|
fn inner(value: &Value, output: &mut String) -> bool {
|
2020-11-06 23:15:07 +08:00
|
|
|
use std::fmt::Write;
|
|
|
|
match value {
|
|
|
|
Value::Null => false,
|
|
|
|
Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
|
|
|
|
Value::Number(number) => write!(output, "{}", number).is_ok(),
|
|
|
|
Value::String(string) => write!(output, "{}", string).is_ok(),
|
|
|
|
Value::Array(array) => {
|
|
|
|
let mut count = 0;
|
|
|
|
for value in array {
|
|
|
|
if inner(value, output) {
|
|
|
|
output.push_str(". ");
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// check that at least one value was written
|
|
|
|
count != 0
|
2021-06-17 00:33:33 +08:00
|
|
|
}
|
2020-11-06 23:15:07 +08:00
|
|
|
Value::Object(object) => {
|
|
|
|
let mut buffer = String::new();
|
|
|
|
let mut count = 0;
|
|
|
|
for (key, value) in object {
|
|
|
|
buffer.clear();
|
|
|
|
let _ = write!(&mut buffer, "{}: ", key);
|
|
|
|
if inner(value, &mut buffer) {
|
|
|
|
buffer.push_str(". ");
|
|
|
|
// We write the "key: value. " pair only when
|
|
|
|
// we are sure that the value can be written.
|
|
|
|
output.push_str(&buffer);
|
|
|
|
count += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// check that at least one value was written
|
|
|
|
count != 0
|
2021-06-17 00:33:33 +08:00
|
|
|
}
|
2020-11-06 23:15:07 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut string = String::new();
|
|
|
|
if inner(value, &mut string) {
|
|
|
|
Some(string)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-06 17:31:24 +08:00
|
|
|
/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
|
|
|
|
fn try_split_at<T>(slice: &[T], mid: usize) -> Option<(&[T], &[T])> {
|
|
|
|
if mid <= slice.len() {
|
|
|
|
Some(slice.split_at(mid))
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Divides one slice into an array and the tail at an index,
|
|
|
|
/// returns `None` if `N` is out of bounds.
|
|
|
|
fn try_split_array_at<T, const N: usize>(slice: &[T]) -> Option<([T; N], &[T])>
|
|
|
|
where
|
|
|
|
[T; N]: for<'a> TryFrom<&'a [T]>,
|
|
|
|
{
|
|
|
|
let (head, tail) = try_split_at(slice, N)?;
|
|
|
|
let head = head.try_into().ok()?;
|
|
|
|
Some((head, tail))
|
|
|
|
}
|
|
|
|
|
2021-09-09 21:19:47 +08:00
|
|
|
/// Return the distance between two points in meters. Each points are composed of two f64,
|
|
|
|
/// one latitude and one longitude.
|
|
|
|
pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
|
2021-09-10 00:11:38 +08:00
|
|
|
let a = geoutils::Location::new(a[0], a[1]);
|
|
|
|
let b = geoutils::Location::new(b[0], b[1]);
|
2021-09-07 18:11:03 +08:00
|
|
|
|
2021-09-10 00:11:38 +08:00
|
|
|
a.haversine_distance_to(&b).meters()
|
2021-09-07 18:11:03 +08:00
|
|
|
}
|
|
|
|
|
2020-11-06 23:15:07 +08:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use serde_json::json;
|
|
|
|
|
2021-06-17 00:33:33 +08:00
|
|
|
use super::*;
|
|
|
|
|
2020-11-06 23:15:07 +08:00
|
|
|
#[test]
|
|
|
|
fn json_to_string_object() {
|
|
|
|
let value = json!({
|
|
|
|
"name": "John Doe",
|
|
|
|
"age": 43,
|
|
|
|
"not_there": null,
|
|
|
|
});
|
|
|
|
|
2020-11-12 00:33:05 +08:00
|
|
|
let string = json_to_string(&value).unwrap();
|
2020-11-06 23:15:07 +08:00
|
|
|
assert_eq!(string, "name: John Doe. age: 43. ");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn json_to_string_array() {
|
|
|
|
let value = json!([
|
|
|
|
{ "name": "John Doe" },
|
|
|
|
43,
|
|
|
|
"hello",
|
|
|
|
[ "I", "am", "fine" ],
|
|
|
|
null,
|
|
|
|
]);
|
|
|
|
|
2020-11-12 00:33:05 +08:00
|
|
|
let string = json_to_string(&value).unwrap();
|
2020-11-06 23:15:07 +08:00
|
|
|
// We don't care about having two point (.) after the other as
|
|
|
|
// the distance of hard separators is clamped to 8 anyway.
|
|
|
|
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
|
|
|
|
}
|
2021-09-22 23:48:24 +08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_relative_position_conversion() {
|
|
|
|
assert_eq!((0x0000, 0x0000), relative_from_absolute_position(0x00000000));
|
|
|
|
assert_eq!((0x0000, 0xFFFF), relative_from_absolute_position(0x0000FFFF));
|
|
|
|
assert_eq!((0xFFFF, 0x0000), relative_from_absolute_position(0xFFFF0000));
|
|
|
|
assert_eq!((0xFF00, 0xFF00), relative_from_absolute_position(0xFF00FF00));
|
|
|
|
assert_eq!((0xFF00, 0x00FF), relative_from_absolute_position(0xFF0000FF));
|
|
|
|
assert_eq!((0x1234, 0x5678), relative_from_absolute_position(0x12345678));
|
|
|
|
assert_eq!((0xFFFF, 0xFFFF), relative_from_absolute_position(0xFFFFFFFF));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_absolute_position_conversion() {
|
|
|
|
assert_eq!(0x00000000, absolute_from_relative_position(0x0000, 0x0000));
|
|
|
|
assert_eq!(0x0000FFFF, absolute_from_relative_position(0x0000, 0xFFFF));
|
|
|
|
assert_eq!(0xFFFF0000, absolute_from_relative_position(0xFFFF, 0x0000));
|
|
|
|
assert_eq!(0xFF00FF00, absolute_from_relative_position(0xFF00, 0xFF00));
|
|
|
|
assert_eq!(0xFF0000FF, absolute_from_relative_position(0xFF00, 0x00FF));
|
|
|
|
assert_eq!(0x12345678, absolute_from_relative_position(0x1234, 0x5678));
|
|
|
|
assert_eq!(0xFFFFFFFF, absolute_from_relative_position(0xFFFF, 0xFFFF));
|
|
|
|
}
|
2020-11-06 23:15:07 +08:00
|
|
|
}
|