meilisearch/meilisearch-http/src/index/updates.rs

377 lines
12 KiB
Rust
Raw Normal View History

2021-06-15 23:39:07 +08:00
use std::collections::{BTreeMap, BTreeSet, HashSet};
2021-03-04 18:56:32 +08:00
use std::io;
2021-05-10 23:30:09 +08:00
use std::marker::PhantomData;
2021-05-12 23:04:24 +08:00
use std::num::NonZeroUsize;
2021-03-04 18:56:32 +08:00
use flate2::read::GzDecoder;
2021-06-23 16:41:55 +08:00
use log::{debug, info, trace};
2021-08-25 02:55:29 +08:00
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat};
2021-05-12 23:04:24 +08:00
use serde::{Deserialize, Serialize, Serializer};
2021-03-04 18:56:32 +08:00
2021-05-11 02:24:14 +08:00
use crate::index_controller::UpdateResult;
2021-05-11 02:22:18 +08:00
use super::error::Result;
2021-08-25 02:55:29 +08:00
use super::Index;
fn serialize_with_wildcard<S>(
2021-08-25 02:55:29 +08:00
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
2021-05-12 23:04:24 +08:00
where
S: Serializer,
{
let wildcard = vec!["*".to_string()];
2021-08-25 02:55:29 +08:00
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
2021-05-12 23:04:24 +08:00
}
2021-03-04 18:56:32 +08:00
#[derive(Clone, Default, Debug, Serialize)]
2021-05-10 23:30:09 +08:00
pub struct Checked;
2021-08-25 02:55:29 +08:00
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
2021-05-10 23:30:09 +08:00
pub struct Unchecked;
2021-03-04 18:56:32 +08:00
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))]
2021-05-10 23:30:09 +08:00
pub struct Settings<T> {
2021-03-04 18:56:32 +08:00
#[serde(
default,
2021-05-12 23:04:24 +08:00
serialize_with = "serialize_with_wildcard",
2021-08-25 02:55:29 +08:00
skip_serializing_if = "Setting::is_not_set"
2021-03-04 18:56:32 +08:00
)]
2021-08-25 02:55:29 +08:00
pub displayed_attributes: Setting<Vec<String>>,
2021-03-04 18:56:32 +08:00
#[serde(
default,
2021-05-12 23:04:24 +08:00
serialize_with = "serialize_with_wildcard",
2021-08-25 02:55:29 +08:00
skip_serializing_if = "Setting::is_not_set"
2021-06-03 20:19:56 +08:00
)]
2021-08-25 02:55:29 +08:00
pub searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<HashSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<HashSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
2021-08-25 02:55:29 +08:00
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub distinct_attribute: Setting<String>,
2021-05-10 23:30:09 +08:00
#[serde(skip)]
pub _kind: PhantomData<T>,
2021-03-04 18:56:32 +08:00
}
2021-05-10 23:30:09 +08:00
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
2021-08-25 02:55:29 +08:00
displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset,
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
2021-08-25 02:55:29 +08:00
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
2021-05-10 23:30:09 +08:00
_kind: PhantomData,
2021-03-04 18:56:32 +08:00
}
}
2021-05-27 20:30:20 +08:00
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
2021-05-27 20:30:20 +08:00
ranking_rules,
stop_words,
2021-06-03 20:19:56 +08:00
synonyms,
2021-05-27 20:30:20 +08:00
distinct_attribute,
..
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
2021-05-27 20:30:20 +08:00
ranking_rules,
stop_words,
2021-06-03 20:19:56 +08:00
synonyms,
2021-05-27 20:30:20 +08:00
distinct_attribute,
_kind: PhantomData,
}
}
2021-03-04 18:56:32 +08:00
}
2021-05-10 23:30:09 +08:00
impl Settings<Unchecked> {
2021-08-25 02:55:29 +08:00
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
2021-05-11 00:22:41 +08:00
if fields.iter().any(|f| f == "*") {
2021-08-25 02:55:29 +08:00
Setting::Reset
2021-05-11 00:22:41 +08:00
} else {
2021-08-25 02:55:29 +08:00
Setting::Set(fields)
2021-05-11 00:22:41 +08:00
}
}
otherwise => otherwise,
};
2021-08-25 02:55:29 +08:00
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
2021-05-11 00:22:41 +08:00
if fields.iter().any(|f| f == "*") {
2021-08-25 02:55:29 +08:00
Setting::Reset
2021-05-11 00:22:41 +08:00
} else {
2021-08-25 02:55:29 +08:00
Setting::Set(fields)
2021-05-11 00:22:41 +08:00
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
2021-05-11 00:22:41 +08:00
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
2021-06-03 20:19:56 +08:00
synonyms: self.synonyms,
2021-05-11 00:22:41 +08:00
distinct_attribute: self.distinct_attribute,
_kind: PhantomData,
}
2021-05-10 23:30:09 +08:00
}
}
2021-03-04 18:56:32 +08:00
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
impl Index {
pub fn update_documents(
&self,
format: UpdateFormat,
method: IndexDocumentsMethod,
2021-04-22 16:14:29 +08:00
content: Option<impl io::Read>,
2021-03-04 18:56:32 +08:00
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
2021-05-12 22:21:37 +08:00
let mut txn = self.write_txn()?;
2021-05-12 23:04:24 +08:00
let result = self.update_documents_txn(
&mut txn,
format,
method,
content,
update_builder,
primary_key,
)?;
2021-05-12 22:21:37 +08:00
txn.commit()?;
Ok(result)
}
pub fn update_documents_txn<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
format: UpdateFormat,
method: IndexDocumentsMethod,
content: Option<impl io::Read>,
update_builder: UpdateBuilder,
primary_key: Option<&str>,
) -> Result<UpdateResult> {
2021-06-23 16:41:55 +08:00
trace!("performing document addition");
2021-03-04 18:56:32 +08:00
// Set the primary key if not set already, ignore if already set.
2021-06-16 23:15:56 +08:00
if let (None, Some(primary_key)) = (self.primary_key(txn)?, primary_key) {
2021-07-30 00:14:36 +08:00
let mut builder = UpdateBuilder::new(0).settings(txn, self);
2021-06-16 23:15:56 +08:00
builder.set_primary_key(primary_key.to_string());
2021-06-17 20:36:32 +08:00
builder.execute(|_, _| ())?;
2021-03-04 18:56:32 +08:00
}
2021-05-12 22:21:37 +08:00
let mut builder = update_builder.index_documents(txn, self);
2021-03-04 18:56:32 +08:00
builder.update_format(format);
builder.index_documents_method(method);
2021-05-31 22:40:59 +08:00
let indexing_callback =
2021-06-23 16:41:55 +08:00
|indexing_step, update_id| debug!("update {}: {:?}", update_id, indexing_step);
2021-05-25 22:33:09 +08:00
2021-03-04 18:56:32 +08:00
let gzipped = false;
2021-05-12 22:21:37 +08:00
let addition = match content {
2021-06-17 20:38:52 +08:00
Some(content) if gzipped => {
builder.execute(GzDecoder::new(content), indexing_callback)?
}
2021-06-17 20:36:32 +08:00
Some(content) => builder.execute(content, indexing_callback)?,
None => builder.execute(std::io::empty(), indexing_callback)?,
2021-03-04 18:56:32 +08:00
};
2021-05-12 22:21:37 +08:00
info!("document addition done: {:?}", addition);
2021-03-04 18:56:32 +08:00
2021-05-12 22:21:37 +08:00
Ok(UpdateResult::DocumentsAddition(addition))
2021-03-04 18:56:32 +08:00
}
pub fn clear_documents(&self, update_builder: UpdateBuilder) -> Result<UpdateResult> {
2021-03-04 18:56:32 +08:00
// We must use the write transaction of the update here.
let mut wtxn = self.write_txn()?;
let builder = update_builder.clear_documents(&mut wtxn, self);
2021-06-17 20:36:32 +08:00
let _count = builder.execute()?;
wtxn.commit()
.and(Ok(UpdateResult::Other))
.map_err(Into::into)
2021-03-04 18:56:32 +08:00
}
2021-05-12 22:21:37 +08:00
pub fn update_settings_txn<'a, 'b>(
&'a self,
txn: &mut heed::RwTxn<'a, 'b>,
2021-05-10 23:30:09 +08:00
settings: &Settings<Checked>,
2021-03-04 18:56:32 +08:00
update_builder: UpdateBuilder,
) -> Result<UpdateResult> {
2021-03-04 18:56:32 +08:00
// We must use the write transaction of the update here.
2021-05-12 22:21:37 +08:00
let mut builder = update_builder.settings(txn, self);
2021-03-04 18:56:32 +08:00
2021-08-25 02:55:29 +08:00
match settings.searchable_attributes {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
2021-03-04 18:56:32 +08:00
}
2021-08-25 02:55:29 +08:00
match settings.displayed_attributes {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
2021-03-04 18:56:32 +08:00
}
2021-08-25 02:55:29 +08:00
match settings.filterable_attributes {
Setting::Set(ref facet_types) => builder.set_filterable_fields(facet_types.clone()),
Setting::Reset => builder.set_filterable_fields(HashSet::new()),
Setting::NotSet => (),
2021-03-04 18:56:32 +08:00
}
match settings.sortable_attributes {
Setting::Set(ref facet_types) => builder.set_sortable_fields(facet_types.clone()),
Setting::Reset => builder.set_sortable_fields(HashSet::new()),
Setting::NotSet => (),
}
2021-08-25 02:55:29 +08:00
match settings.ranking_rules {
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
Setting::Reset => builder.reset_criteria(),
Setting::NotSet => (),
2021-03-04 18:56:32 +08:00
}
2021-08-25 02:55:29 +08:00
match settings.stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
2021-08-25 02:55:29 +08:00
match settings.synonyms {
Setting::Set(ref synonyms) => {
builder.set_synonyms(synonyms.clone().into_iter().collect())
2021-06-03 20:19:56 +08:00
}
2021-08-25 02:55:29 +08:00
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
2021-06-03 20:19:56 +08:00
}
2021-08-25 02:55:29 +08:00
match settings.distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
2021-03-29 15:22:36 +08:00
}
2021-06-17 20:38:52 +08:00
builder.execute(|indexing_step, update_id| {
2021-06-23 16:41:55 +08:00
debug!("update {}: {:?}", update_id, indexing_step)
2021-06-17 20:38:52 +08:00
})?;
2021-03-04 18:56:32 +08:00
2021-05-12 22:21:37 +08:00
Ok(UpdateResult::Other)
}
pub fn update_settings(
&self,
settings: &Settings<Checked>,
update_builder: UpdateBuilder,
) -> Result<UpdateResult> {
2021-05-12 22:21:37 +08:00
let mut txn = self.write_txn()?;
let result = self.update_settings_txn(&mut txn, settings, update_builder)?;
txn.commit()?;
Ok(result)
2021-03-04 18:56:32 +08:00
}
pub fn delete_documents(
&self,
2021-06-10 21:55:44 +08:00
document_ids: &[String],
2021-03-04 18:56:32 +08:00
update_builder: UpdateBuilder,
) -> Result<UpdateResult> {
2021-03-04 18:56:32 +08:00
let mut txn = self.write_txn()?;
2021-06-17 20:36:32 +08:00
let mut builder = update_builder.delete_documents(&mut txn, self)?;
2021-03-04 18:56:32 +08:00
// We ignore unexisting document ids
2021-06-09 23:10:10 +08:00
document_ids.iter().for_each(|id| {
2021-03-16 01:11:10 +08:00
builder.delete_external_id(id);
});
2021-03-04 18:56:32 +08:00
2021-06-17 20:36:32 +08:00
let deleted = builder.execute()?;
txn.commit()
.and(Ok(UpdateResult::DocumentDeletion { deleted }))
.map_err(Into::into)
2021-03-04 18:56:32 +08:00
}
}
2021-05-11 00:34:25 +08:00
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_setting_check() {
// test no changes
let settings = Settings {
2021-08-25 02:55:29 +08:00
displayed_attributes: Setting::Set(vec![String::from("hello")]),
searchable_attributes: Setting::Set(vec![String::from("hello")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
2021-08-25 02:55:29 +08:00
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
2021-05-11 00:34:25 +08:00
_kind: PhantomData::<Unchecked>,
};
let checked = settings.clone().check();
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
2021-05-12 23:04:24 +08:00
assert_eq!(
settings.searchable_attributes,
checked.searchable_attributes
);
2021-05-11 00:34:25 +08:00
// test wildcard
// test no changes
let settings = Settings {
2021-08-25 02:55:29 +08:00
displayed_attributes: Setting::Set(vec![String::from("*")]),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")]),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
2021-08-25 02:55:29 +08:00
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
2021-05-11 00:34:25 +08:00
_kind: PhantomData::<Unchecked>,
};
let checked = settings.check();
2021-08-25 02:55:29 +08:00
assert_eq!(checked.displayed_attributes, Setting::Reset);
assert_eq!(checked.searchable_attributes, Setting::Reset);
2021-05-11 00:34:25 +08:00
}
}