use std::collections::{BTreeSet, HashSet}; use std::ops::Deref; use std::sync::Arc; use anyhow::{bail, Context}; use milli::obkv_to_json; use serde_json::{Map, Value}; use walkdir::WalkDir; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, UpdateResult}; mod search; mod updates; pub type Document = Map; #[derive(Clone)] pub struct Index(pub Arc); impl Deref for Index { type Target = milli::Index; fn deref(&self) -> &Self::Target { self.0.as_ref() } } impl Index { pub fn settings(&self) -> anyhow::Result { let txn = self.read_txn()?; let displayed_attributes = self .displayed_fields(&txn)? .map(|fields| fields.into_iter().map(String::from).collect()) .unwrap_or_else(|| vec!["*".to_string()]); let searchable_attributes = self .searchable_fields(&txn)? .map(|fields| fields.into_iter().map(String::from).collect()) .unwrap_or_else(|| vec!["*".to_string()]); let faceted_attributes = self .faceted_fields(&txn)? .into_iter() .map(|(k, v)| (k, v.to_string())) .collect(); let criteria = self .criteria(&txn)? .into_iter() .map(|c| c.to_string()) .collect(); let stop_words = self .stop_words(&txn)? .map(|stop_words| -> anyhow::Result> { Ok(stop_words .stream() .into_strs()? .into_iter() .collect()) }) .transpose()? .unwrap_or_else(BTreeSet::new); Ok(Settings { displayed_attributes: Some(Some(displayed_attributes)), searchable_attributes: Some(Some(searchable_attributes)), attributes_for_faceting: Some(Some(faceted_attributes)), ranking_rules: Some(Some(criteria)), stop_words: Some(Some(stop_words)), }) } pub fn retrieve_documents>( &self, offset: usize, limit: usize, attributes_to_retrieve: Option>, ) -> anyhow::Result>> { let txn = self.read_txn()?; let fields_ids_map = self.fields_ids_map(&txn)?; let fields_to_display = self.fields_to_display(&txn, attributes_to_retrieve, &fields_ids_map)?; let iter = self.documents.range(&txn, &(..))?.skip(offset).take(limit); let mut documents = Vec::new(); for entry in iter { let (_id, obkv) = entry?; let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; documents.push(object); } Ok(documents) } pub fn retrieve_document>( &self, doc_id: String, attributes_to_retrieve: Option>, ) -> anyhow::Result> { let txn = self.read_txn()?; let fields_ids_map = self.fields_ids_map(&txn)?; let fields_to_display = self.fields_to_display(&txn, attributes_to_retrieve, &fields_ids_map)?; let internal_id = self .external_documents_ids(&txn)? .get(doc_id.as_bytes()) .with_context(|| format!("Document with id {} not found", doc_id))?; let document = self .documents(&txn, std::iter::once(internal_id))? .into_iter() .next() .map(|(_, d)| d); match document { Some(document) => Ok(obkv_to_json(&fields_to_display, &fields_ids_map, document)?), None => bail!("Document with id {} not found", doc_id), } } pub fn size(&self) -> anyhow::Result { Ok(WalkDir::new(self.env.path()) .into_iter() .filter_map(|entry| entry.ok()) .filter_map(|entry| entry.metadata().ok()) .filter(|metadata| metadata.is_file()) .fold(0, |acc, m| acc + m.len())) } fn fields_to_display>( &self, txn: &heed::RoTxn, attributes_to_retrieve: Option>, fields_ids_map: &milli::FieldsIdsMap, ) -> anyhow::Result> { let mut displayed_fields_ids = match self.displayed_fields_ids(&txn)? { Some(ids) => ids.into_iter().collect::>(), None => fields_ids_map.iter().map(|(id, _)| id).collect(), }; let attributes_to_retrieve_ids = match attributes_to_retrieve { Some(attrs) => attrs .iter() .filter_map(|f| fields_ids_map.id(f.as_ref())) .collect::>(), None => fields_ids_map.iter().map(|(id, _)| id).collect(), }; displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid)); Ok(displayed_fields_ids) } }