meilisearch/milli/src/lib.rs

158 lines
5.1 KiB
Rust
Raw Normal View History

2021-06-17 00:33:33 +08:00
#[macro_use]
extern crate pest_derive;
2020-08-12 16:43:02 +08:00
mod criterion;
mod error;
mod external_documents_ids;
pub mod facet;
2021-06-17 00:33:33 +08:00
mod fields_ids_map;
2020-08-28 20:16:37 +08:00
pub mod heed_codec;
pub mod index;
pub mod proximity;
2021-06-17 00:33:33 +08:00
mod search;
2021-03-19 00:20:16 +08:00
pub mod tree_level;
pub mod update;
2020-06-05 02:25:51 +08:00
use std::borrow::Cow;
2020-08-13 20:15:05 +08:00
use std::collections::HashMap;
2020-05-31 22:09:34 +08:00
use std::hash::BuildHasherDefault;
use std::result::Result as StdResult;
use fxhash::{FxHasher32, FxHasher64};
use serde_json::{Map, Value};
2020-06-05 02:25:51 +08:00
2021-06-17 00:33:33 +08:00
pub use self::criterion::{default_criteria, Criterion};
pub use self::error::{Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError};
pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fields_ids_map::FieldsIdsMap;
2021-06-17 00:33:33 +08:00
pub use self::heed_codec::{
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
};
2020-10-21 21:55:48 +08:00
pub use self::index::Index;
2021-06-17 00:33:33 +08:00
pub use self::search::{FacetDistribution, FilterCondition, MatchingWords, Search, SearchResult};
2021-03-19 00:20:16 +08:00
pub use self::tree_level::TreeLevel;
2020-05-31 22:09:34 +08:00
pub type Result<T> = std::result::Result<T, error::Error>;
2020-05-31 22:09:34 +08:00
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
2020-05-31 22:09:34 +08:00
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
2020-06-11 17:55:03 +08:00
pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
2020-11-13 21:49:48 +08:00
pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
2020-05-31 22:09:34 +08:00
pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>;
2020-10-18 21:16:57 +08:00
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
pub type Attribute = u32;
2020-11-27 00:38:08 +08:00
pub type DocumentId = u32;
pub type FieldId = u8;
pub type Position = u32;
pub type FieldsDistribution = HashMap<String, u64>;
type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
/// Transform a raw obkv store into a JSON Object.
pub fn obkv_to_json(
2020-11-27 00:38:08 +08:00
displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader,
2021-06-17 00:33:33 +08:00
) -> Result<Map<String, Value>> {
displayed_fields
.iter()
.copied()
.flat_map(|id| obkv.get(id).map(|value| (id, value)))
.map(|(id, value)| {
let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
field_id: id,
process: "obkv_to_json",
})?;
let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
Ok((name.to_owned(), value))
})
.collect()
}
/// Transform a JSON value into a string that can be indexed.
pub fn json_to_string(value: &Value) -> Option<String> {
fn inner(value: &Value, output: &mut String) -> bool {
use std::fmt::Write;
match value {
Value::Null => false,
Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
Value::Number(number) => write!(output, "{}", number).is_ok(),
Value::String(string) => write!(output, "{}", string).is_ok(),
Value::Array(array) => {
let mut count = 0;
for value in array {
if inner(value, output) {
output.push_str(". ");
count += 1;
}
}
// check that at least one value was written
count != 0
2021-06-17 00:33:33 +08:00
}
Value::Object(object) => {
let mut buffer = String::new();
let mut count = 0;
for (key, value) in object {
buffer.clear();
let _ = write!(&mut buffer, "{}: ", key);
if inner(value, &mut buffer) {
buffer.push_str(". ");
// We write the "key: value. " pair only when
// we are sure that the value can be written.
output.push_str(&buffer);
count += 1;
}
}
// check that at least one value was written
count != 0
2021-06-17 00:33:33 +08:00
}
}
}
let mut string = String::new();
if inner(value, &mut string) {
Some(string)
} else {
None
}
}
#[cfg(test)]
mod tests {
use serde_json::json;
2021-06-17 00:33:33 +08:00
use super::*;
#[test]
fn json_to_string_object() {
let value = json!({
"name": "John Doe",
"age": 43,
"not_there": null,
});
let string = json_to_string(&value).unwrap();
assert_eq!(string, "name: John Doe. age: 43. ");
}
#[test]
fn json_to_string_array() {
let value = json!([
{ "name": "John Doe" },
43,
"hello",
[ "I", "am", "fine" ],
null,
]);
let string = json_to_string(&value).unwrap();
// We don't care about having two point (.) after the other as
// the distance of hard separators is clamped to 8 anyway.
assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
}
}