2021-05-10 23:30:09 +08:00
|
|
|
use std::{collections::{BTreeSet, HashSet}, marker::PhantomData};
|
2021-03-04 18:56:32 +08:00
|
|
|
use std::ops::Deref;
|
2021-03-15 17:17:41 +08:00
|
|
|
use std::sync::Arc;
|
2021-03-04 18:56:32 +08:00
|
|
|
|
2021-03-04 22:09:00 +08:00
|
|
|
use anyhow::{bail, Context};
|
2021-03-04 21:20:19 +08:00
|
|
|
use milli::obkv_to_json;
|
2021-03-15 17:17:41 +08:00
|
|
|
use serde_json::{Map, Value};
|
2021-03-04 21:20:19 +08:00
|
|
|
|
2021-04-14 23:53:12 +08:00
|
|
|
use crate::helpers::EnvSizer;
|
2021-03-04 18:56:32 +08:00
|
|
|
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
|
2021-05-11 02:48:06 +08:00
|
|
|
pub use updates::{Facets, Settings, Checked, Unchecked};
|
2021-05-11 02:22:18 +08:00
|
|
|
use serde::{de::Deserializer, Deserialize};
|
2021-03-04 18:56:32 +08:00
|
|
|
|
2021-04-01 22:44:42 +08:00
|
|
|
mod search;
|
|
|
|
mod updates;
|
|
|
|
|
2021-03-04 21:20:19 +08:00
|
|
|
pub type Document = Map<String, Value>;
|
|
|
|
|
2021-03-04 18:56:32 +08:00
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct Index(pub Arc<milli::Index>);
|
|
|
|
|
|
|
|
impl Deref for Index {
|
|
|
|
type Target = milli::Index;
|
|
|
|
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
self.0.as_ref()
|
|
|
|
}
|
|
|
|
}
|
2021-03-04 19:38:55 +08:00
|
|
|
|
2021-05-11 02:22:18 +08:00
|
|
|
pub fn deserialize_some<'de, T, D>(deserializer: D) -> Result<Option<T>, D::Error>
|
|
|
|
where
|
|
|
|
T: Deserialize<'de>,
|
|
|
|
D: Deserializer<'de>,
|
|
|
|
{
|
|
|
|
Deserialize::deserialize(deserializer).map(Some)
|
|
|
|
}
|
|
|
|
|
2021-03-04 19:38:55 +08:00
|
|
|
impl Index {
|
2021-05-10 23:30:09 +08:00
|
|
|
pub fn settings(&self) -> anyhow::Result<Settings<Checked>> {
|
2021-03-04 19:38:55 +08:00
|
|
|
let txn = self.read_txn()?;
|
|
|
|
|
|
|
|
let displayed_attributes = self
|
|
|
|
.displayed_fields(&txn)?
|
|
|
|
.map(|fields| fields.into_iter().map(String::from).collect())
|
|
|
|
.unwrap_or_else(|| vec!["*".to_string()]);
|
|
|
|
|
|
|
|
let searchable_attributes = self
|
|
|
|
.searchable_fields(&txn)?
|
|
|
|
.map(|fields| fields.into_iter().map(String::from).collect())
|
|
|
|
.unwrap_or_else(|| vec!["*".to_string()]);
|
|
|
|
|
|
|
|
let faceted_attributes = self
|
|
|
|
.faceted_fields(&txn)?
|
|
|
|
.into_iter()
|
|
|
|
.map(|(k, v)| (k, v.to_string()))
|
|
|
|
.collect();
|
|
|
|
|
2021-03-12 05:39:16 +08:00
|
|
|
let criteria = self
|
|
|
|
.criteria(&txn)?
|
|
|
|
.into_iter()
|
|
|
|
.map(|c| c.to_string())
|
|
|
|
.collect();
|
|
|
|
|
2021-04-06 21:41:03 +08:00
|
|
|
let stop_words = self
|
|
|
|
.stop_words(&txn)?
|
|
|
|
.map(|stop_words| -> anyhow::Result<BTreeSet<_>> {
|
2021-04-09 20:41:24 +08:00
|
|
|
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
2021-04-06 21:41:03 +08:00
|
|
|
})
|
|
|
|
.transpose()?
|
|
|
|
.unwrap_or_else(BTreeSet::new);
|
2021-04-22 16:14:29 +08:00
|
|
|
let distinct_attribute = self.distinct_attribute(&txn)?.map(String::from);
|
2021-04-06 21:41:03 +08:00
|
|
|
|
2021-03-04 19:38:55 +08:00
|
|
|
Ok(Settings {
|
|
|
|
displayed_attributes: Some(Some(displayed_attributes)),
|
|
|
|
searchable_attributes: Some(Some(searchable_attributes)),
|
2021-03-15 20:53:50 +08:00
|
|
|
attributes_for_faceting: Some(Some(faceted_attributes)),
|
2021-03-12 05:39:16 +08:00
|
|
|
ranking_rules: Some(Some(criteria)),
|
2021-04-06 21:41:03 +08:00
|
|
|
stop_words: Some(Some(stop_words)),
|
2021-03-29 15:22:36 +08:00
|
|
|
distinct_attribute: Some(distinct_attribute),
|
2021-05-10 23:30:09 +08:00
|
|
|
_kind: PhantomData,
|
2021-03-04 19:38:55 +08:00
|
|
|
})
|
|
|
|
}
|
2021-03-04 21:20:19 +08:00
|
|
|
|
2021-03-16 01:11:10 +08:00
|
|
|
pub fn retrieve_documents<S: AsRef<str>>(
|
2021-03-04 21:20:19 +08:00
|
|
|
&self,
|
|
|
|
offset: usize,
|
|
|
|
limit: usize,
|
|
|
|
attributes_to_retrieve: Option<Vec<S>>,
|
2021-03-16 01:11:10 +08:00
|
|
|
) -> anyhow::Result<Vec<Map<String, Value>>> {
|
2021-03-04 21:20:19 +08:00
|
|
|
let txn = self.read_txn()?;
|
|
|
|
|
|
|
|
let fields_ids_map = self.fields_ids_map(&txn)?;
|
2021-03-16 01:11:10 +08:00
|
|
|
let fields_to_display =
|
2021-04-19 22:22:41 +08:00
|
|
|
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
2021-03-04 21:20:19 +08:00
|
|
|
|
|
|
|
let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit);
|
|
|
|
|
|
|
|
let mut documents = Vec::new();
|
|
|
|
|
2021-05-11 02:22:18 +08:00
|
|
|
println!("fields to display: {:?}", fields_to_display);
|
|
|
|
|
2021-03-04 21:20:19 +08:00
|
|
|
for entry in iter {
|
|
|
|
let (_id, obkv) = entry?;
|
2021-03-15 18:01:14 +08:00
|
|
|
let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?;
|
2021-03-04 21:20:19 +08:00
|
|
|
documents.push(object);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(documents)
|
|
|
|
}
|
2021-03-04 22:09:00 +08:00
|
|
|
|
|
|
|
pub fn retrieve_document<S: AsRef<str>>(
|
|
|
|
&self,
|
|
|
|
doc_id: String,
|
|
|
|
attributes_to_retrieve: Option<Vec<S>>,
|
|
|
|
) -> anyhow::Result<Map<String, Value>> {
|
2021-03-15 17:17:41 +08:00
|
|
|
let txn = self.read_txn()?;
|
2021-03-04 22:09:00 +08:00
|
|
|
|
2021-03-15 17:17:41 +08:00
|
|
|
let fields_ids_map = self.fields_ids_map(&txn)?;
|
2021-03-04 22:09:00 +08:00
|
|
|
|
2021-03-16 01:11:10 +08:00
|
|
|
let fields_to_display =
|
2021-04-19 22:22:41 +08:00
|
|
|
self.fields_to_display(&txn, &attributes_to_retrieve, &fields_ids_map)?;
|
2021-03-15 17:36:12 +08:00
|
|
|
|
2021-03-15 17:17:41 +08:00
|
|
|
let internal_id = self
|
|
|
|
.external_documents_ids(&txn)?
|
|
|
|
.get(doc_id.as_bytes())
|
|
|
|
.with_context(|| format!("Document with id {} not found", doc_id))?;
|
|
|
|
|
|
|
|
let document = self
|
|
|
|
.documents(&txn, std::iter::once(internal_id))?
|
|
|
|
.into_iter()
|
|
|
|
.next()
|
|
|
|
.map(|(_, d)| d);
|
|
|
|
|
|
|
|
match document {
|
2021-03-16 01:11:10 +08:00
|
|
|
Some(document) => Ok(obkv_to_json(&fields_to_display, &fields_ids_map, document)?),
|
2021-03-15 17:17:41 +08:00
|
|
|
None => bail!("Document with id {} not found", doc_id),
|
|
|
|
}
|
2021-03-04 22:09:00 +08:00
|
|
|
}
|
2021-03-15 18:01:14 +08:00
|
|
|
|
2021-04-09 20:41:24 +08:00
|
|
|
pub fn size(&self) -> u64 {
|
|
|
|
self.env.size()
|
2021-04-01 22:44:42 +08:00
|
|
|
}
|
|
|
|
|
2021-03-15 18:01:14 +08:00
|
|
|
fn fields_to_display<S: AsRef<str>>(
|
|
|
|
&self,
|
|
|
|
txn: &heed::RoTxn,
|
2021-04-19 22:22:41 +08:00
|
|
|
attributes_to_retrieve: &Option<Vec<S>>,
|
2021-03-15 18:01:14 +08:00
|
|
|
fields_ids_map: &milli::FieldsIdsMap,
|
|
|
|
) -> anyhow::Result<Vec<u8>> {
|
|
|
|
let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)? {
|
|
|
|
Some(ids) => ids.into_iter().collect::<Vec<_>>(),
|
|
|
|
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
|
|
|
};
|
|
|
|
|
|
|
|
let attributes_to_retrieve_ids = match attributes_to_retrieve {
|
|
|
|
Some(attrs) => attrs
|
|
|
|
.iter()
|
|
|
|
.filter_map(|f| fields_ids_map.id(f.as_ref()))
|
|
|
|
.collect::<HashSet<_>>(),
|
|
|
|
None => fields_ids_map.iter().map(|(id, _)| id).collect(),
|
|
|
|
};
|
|
|
|
|
|
|
|
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
|
|
|
|
Ok(displayed_fields_ids)
|
|
|
|
}
|
2021-03-04 19:38:55 +08:00
|
|
|
}
|