diff --git a/Cargo.lock b/Cargo.lock index 8f296f2fa..91e72450a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1520,7 +1520,8 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "pest" version = "2.1.3" -source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" dependencies = [ "ucd-trie", ] @@ -1528,8 +1529,7 @@ dependencies = [ [[package]] name = "pest" version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +source = "git+https://github.com/pest-parser/pest.git?rev=51fd1d49f1041f7839975664ef71fe15c7dcaf67#51fd1d49f1041f7839975664ef71fe15c7dcaf67" dependencies = [ "ucd-trie", ] diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index f068b5b9a..6e9a07855 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -1,38 +1,38 @@ +use std::{io, mem}; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::fmt::Display; -use std::fs::{File, create_dir_all}; +use std::fs::{create_dir_all, File}; use std::net::SocketAddr; use std::num::NonZeroUsize; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; use std::time::Instant; -use std::{mem, io}; use askama_warp::Template; use byte_unit::Byte; use either::Either; use flate2::read::GzDecoder; -use futures::stream; use futures::{FutureExt, StreamExt}; +use futures::stream; use grenad::CompressionType; use heed::EnvOpenOptions; +use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::OnceCell; use rayon::ThreadPool; -use serde::{Serialize, Deserialize, Deserializer}; +use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use structopt::StructOpt; use tokio::fs::File as TFile; use tokio::io::AsyncWriteExt; use tokio::sync::broadcast; -use warp::filters::ws::Message; use warp::{Filter, http::Response}; -use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; +use warp::filters::ws::Message; +use milli::{FacetCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore}; use milli::facet::FacetValue; +use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat}; use milli::update::UpdateIndexingStep::*; -use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat}; -use milli::{obkv_to_json, Index, UpdateStore, SearchResult, MatchingWords, FacetCondition}; static GLOBAL_THREAD_POOL: OnceCell = OnceCell::new(); @@ -154,17 +154,17 @@ impl<'a, A: AsRef<[u8]>> Highlighter<'a, A> { } } Value::String(string) - }, + } Value::Array(values) => { Value::Array(values.into_iter() .map(|v| self.highlight_value(v, matching_words)) .collect()) - }, + } Value::Object(object) => { Value::Object(object.into_iter() .map(|(k, v)| (k, self.highlight_value(v, matching_words))) .collect()) - }, + } } } @@ -246,36 +246,20 @@ enum UpdateMetaProgress { #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] struct Settings { - #[serde( - default, - deserialize_with = "deserialize_some", - skip_serializing_if = "Option::is_none", - )] - displayed_attributes: Option>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + displayed_attributes: Setting>, - #[serde( - default, - deserialize_with = "deserialize_some", - skip_serializing_if = "Option::is_none", - )] - searchable_attributes: Option>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + searchable_attributes: Setting>, - #[serde(default)] - faceted_attributes: Option>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + faceted_attributes: Setting>, - #[serde( - default, - deserialize_with = "deserialize_some", - skip_serializing_if = "Option::is_none", - )] - criteria: Option>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + criteria: Setting>, - #[serde( - default, - deserialize_with = "deserialize_some", - skip_serializing_if = "Option::is_none", - )] - stop_words: Option>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + stop_words: Setting>, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -294,14 +278,6 @@ struct WordsPrefixes { max_prefix_length: Option, } -// Any value that is present is considered Some value, including null. -fn deserialize_some<'de, T, D>(deserializer: D) -> Result, D::Error> -where T: Deserialize<'de>, - D: Deserializer<'de> -{ - Deserialize::deserialize(deserializer).map(Some) -} - #[tokio::main] async fn main() -> anyhow::Result<()> { let opt = Opt::from_args(); @@ -339,7 +315,7 @@ async fn main() -> anyhow::Result<()> { update_store_options, update_store_path, // the type hint is necessary: https://github.com/rust-lang/rust/issues/32600 - move |update_id, meta, content:&_| { + move |update_id, meta, content: &_| { // We prepare the update by using the update builder. let mut update_builder = UpdateBuilder::new(update_id); if let Some(max_nb_chunks) = indexer_opt_cloned.max_nb_chunks { @@ -396,7 +372,7 @@ async fn main() -> anyhow::Result<()> { total_steps: indexing_step.number_of_steps(), current, total, - } + }, }); }); @@ -404,7 +380,7 @@ async fn main() -> anyhow::Result<()> { Ok(_) => wtxn.commit().map_err(Into::into), Err(e) => Err(e.into()) } - }, + } UpdateMeta::ClearDocuments => { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; @@ -414,47 +390,45 @@ async fn main() -> anyhow::Result<()> { Ok(_count) => wtxn.commit().map_err(Into::into), Err(e) => Err(e.into()) } - }, + } UpdateMeta::Settings(settings) => { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; let mut builder = update_builder.settings(&mut wtxn, &index_cloned); // We transpose the settings JSON struct into a real setting update. - if let Some(names) = settings.searchable_attributes { - match names { - Some(names) => builder.set_searchable_fields(names), - None => builder.reset_searchable_fields(), - } + match settings.searchable_attributes { + Setting::Set(searchable_attributes) => builder.set_searchable_fields(searchable_attributes), + Setting::Reset => builder.reset_searchable_fields(), + Setting::NotSet => () } // We transpose the settings JSON struct into a real setting update. - if let Some(names) = settings.displayed_attributes { - match names { - Some(names) => builder.set_displayed_fields(names), - None => builder.reset_displayed_fields(), - } + match settings.displayed_attributes { + Setting::Set(displayed_attributes) => builder.set_displayed_fields(displayed_attributes), + Setting::Reset => builder.reset_displayed_fields(), + Setting::NotSet => () } // We transpose the settings JSON struct into a real setting update. - if let Some(facet_types) = settings.faceted_attributes { - builder.set_faceted_fields(facet_types); + match settings.faceted_attributes { + Setting::Set(faceted_attributes) => builder.set_faceted_fields(faceted_attributes), + Setting::Reset => builder.reset_faceted_fields(), + Setting::NotSet => () } // We transpose the settings JSON struct into a real setting update. - if let Some(criteria) = settings.criteria { - match criteria { - Some(criteria) => builder.set_criteria(criteria), - None => builder.reset_criteria(), - } + match settings.criteria { + Setting::Set(criteria) => builder.set_criteria(criteria), + Setting::Reset => builder.reset_criteria(), + Setting::NotSet => () } // We transpose the settings JSON struct into a real setting update. - if let Some(stop_words) = settings.stop_words { - match stop_words { - Some(stop_words) => builder.set_stop_words(stop_words), - None => builder.reset_stop_words(), - } + match settings.stop_words { + Setting::Set(stop_words) => builder.set_stop_words(stop_words), + Setting::Reset => builder.reset_stop_words(), + Setting::NotSet => () } let result = builder.execute(|indexing_step, update_id| { @@ -471,7 +445,7 @@ async fn main() -> anyhow::Result<()> { total_steps: indexing_step.number_of_steps(), current, total, - } + }, }); }); @@ -479,7 +453,7 @@ async fn main() -> anyhow::Result<()> { Ok(_count) => wtxn.commit().map_err(Into::into), Err(e) => Err(e.into()) } - }, + } UpdateMeta::Facets(levels) => { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; @@ -494,7 +468,7 @@ async fn main() -> anyhow::Result<()> { Ok(()) => wtxn.commit().map_err(Into::into), Err(e) => Err(e.into()) } - }, + } UpdateMeta::WordsPrefixes(settings) => { // We must use the write transaction of the update here. let mut wtxn = index_cloned.write_txn()?; @@ -716,7 +690,7 @@ async fn main() -> anyhow::Result<()> { let filters = match query.filters { Some(condition) if !condition.trim().is_empty() => { Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap()) - }, + } _otherwise => None, }; @@ -724,14 +698,14 @@ async fn main() -> anyhow::Result<()> { Some(array) => { let eithers = array.into_iter().map(Into::into); FacetCondition::from_array(&rtxn, &index, eithers).unwrap() - }, + } _otherwise => None, }; let condition = match (filters, facet_filters) { (Some(filters), Some(facet_filters)) => { Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters))) - }, + } (Some(condition), None) | (None, Some(condition)) => Some(condition), _otherwise => None, }; @@ -807,12 +781,12 @@ async fn main() -> anyhow::Result<()> { Response::builder() .header("Content-Type", "application/json") .body(serde_json::to_string(&document).unwrap()) - }, + } None => { Response::builder() .status(404) .body(format!("Document with id {:?} not found.", id)) - }, + } } }); @@ -978,11 +952,11 @@ async fn main() -> anyhow::Result<()> { Ok(status) => { let msg = serde_json::to_string(&status).unwrap(); stream::iter(Some(Ok(Message::text(msg)))) - }, + } Err(e) => { eprintln!("channel error: {:?}", e); stream::iter(None) - }, + } } }) .forward(websocket) diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs index fcdcb33e9..c2df94468 100644 --- a/milli/src/update/mod.rs +++ b/milli/src/update/mod.rs @@ -1,3 +1,13 @@ +pub use self::available_documents_ids::AvailableDocumentsIds; +pub use self::clear_documents::ClearDocuments; +pub use self::delete_documents::DeleteDocuments; +pub use self::facets::Facets; +pub use self::index_documents::{DocumentAdditionResult, IndexDocuments, IndexDocumentsMethod, UpdateFormat}; +pub use self::settings::{Setting, Settings}; +pub use self::update_builder::UpdateBuilder; +pub use self::update_step::UpdateIndexingStep; +pub use self::words_prefixes::WordsPrefixes; + mod available_documents_ids; mod clear_documents; mod delete_documents; @@ -8,12 +18,3 @@ mod update_builder; mod update_step; mod words_prefixes; -pub use self::available_documents_ids::AvailableDocumentsIds; -pub use self::clear_documents::ClearDocuments; -pub use self::delete_documents::DeleteDocuments; -pub use self::facets::Facets; -pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat, DocumentAdditionResult}; -pub use self::settings::Settings; -pub use self::update_builder::UpdateBuilder; -pub use self::update_step::UpdateIndexingStep; -pub use self::words_prefixes::WordsPrefixes; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 45a4c204c..f73d0f4d2 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -6,12 +6,51 @@ use chrono::Utc; use grenad::CompressionType; use itertools::Itertools; use rayon::ThreadPool; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use crate::{FieldsIdsMap, Index}; use crate::criterion::Criterion; use crate::facet::FacetType; -use crate::update::index_documents::{Transform, IndexDocumentsMethod}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; -use crate::{Index, FieldsIdsMap}; +use crate::update::index_documents::{IndexDocumentsMethod, Transform}; + +#[derive(Debug, Clone)] +pub enum Setting { + Set(T), + NotSet, + Reset, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } +} + +impl Serialize for Setting { + fn serialize(&self, serializer: S) -> Result where S: Serializer { + match self { + Self::Set(value) => Some(value), + // Usually not_set isn't serialized by setting skip_serializing_if field attribute + Self::NotSet | Self::Reset => None, + }.serialize(serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} pub struct Settings<'a, 't, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -26,13 +65,11 @@ pub struct Settings<'a, 't, 'u, 'i> { pub(crate) thread_pool: Option<&'a ThreadPool>, update_id: u64, - // If a struct field is set to `None` it means that it hasn't been set by the user, - // however if it is `Some(None)` it means that the user forced a reset of the setting. - searchable_fields: Option>>, - displayed_fields: Option>>, - faceted_fields: Option>>, - criteria: Option>>, - stop_words: Option>>, + searchable_fields: Setting>, + displayed_fields: Setting>, + faceted_fields: Setting>, + criteria: Setting>, + stop_words: Setting>, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -52,62 +89,62 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { chunk_compression_level: None, chunk_fusing_shrink_size: None, thread_pool: None, - searchable_fields: None, - displayed_fields: None, - faceted_fields: None, - criteria: None, - stop_words: None, + searchable_fields: Setting::NotSet, + displayed_fields: Setting::NotSet, + faceted_fields: Setting::NotSet, + criteria: Setting::NotSet, + stop_words: Setting::NotSet, update_id, } } pub fn reset_searchable_fields(&mut self) { - self.searchable_fields = Some(None); + self.searchable_fields = Setting::Reset; } pub fn set_searchable_fields(&mut self, names: Vec) { - self.searchable_fields = Some(Some(names)); + self.searchable_fields = Setting::Set(names); } pub fn reset_displayed_fields(&mut self) { - self.displayed_fields = Some(None); + self.displayed_fields = Setting::Reset; } pub fn set_displayed_fields(&mut self, names: Vec) { - self.displayed_fields = Some(Some(names)); - } - - pub fn set_faceted_fields(&mut self, names_facet_types: HashMap) { - self.faceted_fields = Some(Some(names_facet_types)); + self.displayed_fields = Setting::Set(names); } pub fn reset_faceted_fields(&mut self) { - self.faceted_fields = Some(None); + self.faceted_fields = Setting::Reset; + } + + pub fn set_faceted_fields(&mut self, names_facet_types: HashMap) { + self.faceted_fields = Setting::Set(names_facet_types); } pub fn reset_criteria(&mut self) { - self.criteria = Some(None); + self.criteria = Setting::Reset; } pub fn set_criteria(&mut self, criteria: Vec) { - self.criteria = Some(Some(criteria)); + self.criteria = Setting::Set(criteria); } pub fn reset_stop_words(&mut self) { - self.stop_words = Some(None); + self.stop_words = Setting::Reset; } pub fn set_stop_words(&mut self, stop_words: BTreeSet) { self.stop_words = if stop_words.is_empty() { - Some(None) + Setting::Reset } else { - Some(Some(stop_words)) + Setting::Set(stop_words) } } fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> - where - F: Fn(UpdateIndexingStep, u64) + Sync + where + F: Fn(UpdateIndexingStep, u64) + Sync { let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let update_id = self.update_id; @@ -115,7 +152,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(&self.wtxn)? == 0 { - return Ok(()) + return Ok(()); } let transform = Transform { @@ -160,7 +197,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_displayed(&mut self) -> anyhow::Result { match self.displayed_fields { - Some(Some(ref fields)) => { + Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; // fields are deduplicated, only the first occurrence is taken into account let names: Vec<_> = fields @@ -177,8 +214,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.index.put_displayed_fields(self.wtxn, &names)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } - Some(None) => { self.index.delete_displayed_fields(self.wtxn)?; }, - None => return Ok(false), + Setting::Reset => { self.index.delete_displayed_fields(self.wtxn)?; } + Setting::NotSet => return Ok(false), } Ok(true) } @@ -187,7 +224,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { /// reflect the order of the searchable attributes. fn update_searchable(&mut self) -> anyhow::Result { match self.searchable_fields { - Some(Some(ref fields)) => { + Setting::Set(ref fields) => { // every time the searchable attributes are updated, we need to update the // ids for any settings that uses the facets. (displayed_fields, // faceted_fields) @@ -218,15 +255,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.index.put_searchable_fields(self.wtxn, &names)?; self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; } - Some(None) => { self.index.delete_searchable_fields(self.wtxn)?; }, - None => return Ok(false), + Setting::Reset => { self.index.delete_searchable_fields(self.wtxn)?; } + Setting::NotSet => return Ok(false), } Ok(true) } fn update_stop_words(&mut self) -> anyhow::Result { match self.stop_words { - Some(Some(ref stop_words)) => { + Setting::Set(ref stop_words) => { let current = self.index.stop_words(self.wtxn)?; // since we can't compare a BTreeSet with an FST we are going to convert the // BTreeSet to an FST and then compare bytes per bytes the two FSTs. @@ -241,14 +278,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(false) } } - Some(None) => Ok(self.index.delete_stop_words(self.wtxn)?), - None => Ok(false), + Setting::Reset => Ok(self.index.delete_stop_words(self.wtxn)?), + Setting::NotSet => Ok(false), } } fn update_facets(&mut self) -> anyhow::Result { match self.faceted_fields { - Some(Some(ref fields)) => { + Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_facets = HashMap::new(); for (name, ty) in fields { @@ -259,15 +296,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.index.put_faceted_fields(self.wtxn, &new_facets)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } - Some(None) => { self.index.delete_faceted_fields(self.wtxn)?; }, - None => return Ok(false) + Setting::Reset => { self.index.delete_faceted_fields(self.wtxn)?; } + Setting::NotSet => return Ok(false) } Ok(true) } fn update_criteria(&mut self) -> anyhow::Result<()> { match self.criteria { - Some(Some(ref fields)) => { + Setting::Set(ref fields) => { let faceted_fields = self.index.faceted_fields(&self.wtxn)?; let mut new_criteria = Vec::new(); for name in fields { @@ -276,15 +313,15 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } self.index.put_criteria(self.wtxn, &new_criteria)?; } - Some(None) => { self.index.delete_criteria(self.wtxn)?; } - None => (), + Setting::Reset => { self.index.delete_criteria(self.wtxn)?; } + Setting::NotSet => (), } Ok(()) } pub fn execute(mut self, progress_callback: F) -> anyhow::Result<()> - where - F: Fn(UpdateIndexingStep, u64) + Sync + where + F: Fn(UpdateIndexingStep, u64) + Sync { self.index.set_updated_at(self.wtxn, &Utc::now())?; let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; @@ -305,14 +342,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { #[cfg(test)] mod tests { - use super::*; - use heed::EnvOpenOptions; - use maplit::{hashmap, btreeset}; + use maplit::{btreeset, hashmap}; use crate::facet::FacetType; use crate::update::{IndexDocuments, UpdateFormat}; + use super::*; + #[test] fn set_and_reset_searchable_fields() { let path = tempfile::tempdir().unwrap(); @@ -480,7 +517,7 @@ mod tests { // Set the faceted fields to be the age. let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_faceted_fields(hashmap!{ "age".into() => "integer".into() }); + builder.set_faceted_fields(hashmap! { "age".into() => "integer".into() }); builder.execute(|_, _| ()).unwrap(); // Then index some documents. @@ -493,7 +530,7 @@ mod tests { // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); let fields_ids = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashmap!{ "age".to_string() => FacetType::Integer }); + assert_eq!(fields_ids, hashmap! { "age".to_string() => FacetType::Integer }); // Only count the field_id 0 and level 0 facet values. let count = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[0, 0]).unwrap().count(); assert_eq!(count, 3); @@ -550,7 +587,7 @@ mod tests { // In the same transaction we provide some stop_words let mut builder = Settings::new(&mut wtxn, &index, 0); - let set = btreeset!{ "i".to_string(), "the".to_string(), "are".to_string() }; + let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() }; builder.set_stop_words(set.clone()); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -614,7 +651,7 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_displayed_fields(vec!["hello".to_string()]); - builder.set_faceted_fields(hashmap!{ + builder.set_faceted_fields(hashmap! { "age".into() => "integer".into(), "toto".into() => "integer".into(), });