From 25b3c9a057ab06f5e3b1207163c77c095a4363c9 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 18 May 2020 13:19:19 +0200 Subject: [PATCH] Remove the serde ExtractDocumentId struct --- meilisearch-core/src/lib.rs | 2 +- .../src/serde/convert_to_string.rs | 279 ---------------- .../src/serde/extract_document_id.rs | 310 ------------------ meilisearch-core/src/serde/mod.rs | 18 +- .../src/update/documents_addition.rs | 44 ++- .../src/update/documents_deletion.rs | 17 - meilisearch-core/src/update/mod.rs | 1 + meilisearch-http/src/routes/document.rs | 11 +- 8 files changed, 47 insertions(+), 635 deletions(-) delete mode 100644 meilisearch-core/src/serde/convert_to_string.rs delete mode 100644 meilisearch-core/src/serde/extract_document_id.rs diff --git a/meilisearch-core/src/lib.rs b/meilisearch-core/src/lib.rs index 179c7746c..9cec35976 100644 --- a/meilisearch-core/src/lib.rs +++ b/meilisearch-core/src/lib.rs @@ -18,7 +18,7 @@ mod query_words_mapper; mod ranked_map; mod raw_document; mod reordered_attrs; -mod update; +pub mod update; pub mod criterion; pub mod facets; pub mod raw_indexer; diff --git a/meilisearch-core/src/serde/convert_to_string.rs b/meilisearch-core/src/serde/convert_to_string.rs deleted file mode 100644 index cd072c904..000000000 --- a/meilisearch-core/src/serde/convert_to_string.rs +++ /dev/null @@ -1,279 +0,0 @@ -use serde::ser; -use serde::Serialize; - -use super::SerializerError; - -pub struct ConvertToString; - -impl ser::Serializer for ConvertToString { - type Ok = String; - type Error = SerializerError; - type SerializeSeq = SeqConvertToString; - type SerializeTuple = ser::Impossible; - type SerializeTupleStruct = ser::Impossible; - type SerializeTupleVariant = ser::Impossible; - type SerializeMap = MapConvertToString; - type SerializeStruct = StructConvertToString; - type SerializeStructVariant = ser::Impossible; - - fn serialize_bool(self, value: bool) -> Result { - Ok(value.to_string()) - } - - fn serialize_char(self, value: char) -> Result { - Ok(value.to_string()) - } - - fn serialize_i8(self, value: i8) -> Result { - Ok(value.to_string()) - } - - fn serialize_i16(self, value: i16) -> Result { - Ok(value.to_string()) - } - - fn serialize_i32(self, value: i32) -> Result { - Ok(value.to_string()) - } - - fn serialize_i64(self, value: i64) -> Result { - Ok(value.to_string()) - } - - fn serialize_u8(self, value: u8) -> Result { - Ok(value.to_string()) - } - - fn serialize_u16(self, value: u16) -> Result { - Ok(value.to_string()) - } - - fn serialize_u32(self, value: u32) -> Result { - Ok(value.to_string()) - } - - fn serialize_u64(self, value: u64) -> Result { - Ok(value.to_string()) - } - - fn serialize_f32(self, value: f32) -> Result { - Ok(value.to_string()) - } - - fn serialize_f64(self, value: f64) -> Result { - Ok(value.to_string()) - } - - fn serialize_str(self, value: &str) -> Result { - Ok(value.to_string()) - } - - fn serialize_bytes(self, _v: &[u8]) -> Result { - Err(SerializerError::UnserializableType { type_name: "&[u8]" }) - } - - fn serialize_none(self) -> Result { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_some(self, _value: &T) -> Result - where - T: Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_unit(self) -> Result { - Ok(String::new()) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit struct", - }) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit variant", - }) - } - - fn serialize_newtype_struct( - self, - _name: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _value: &T, - ) -> Result - where - T: Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "newtype variant", - }) - } - - fn serialize_seq(self, _len: Option) -> Result { - Ok(SeqConvertToString { - text: String::new(), - }) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Err(SerializerError::UnserializableType { type_name: "tuple" }) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple struct", - }) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple variant", - }) - } - - fn serialize_map(self, _len: Option) -> Result { - Ok(MapConvertToString { - text: String::new(), - }) - } - - fn serialize_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(StructConvertToString { - text: String::new(), - }) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "struct variant", - }) - } -} - -pub struct MapConvertToString { - text: String, -} - -impl ser::SerializeMap for MapConvertToString { - type Ok = String; - type Error = SerializerError; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let text = key.serialize(ConvertToString)?; - self.text.push_str(&text); - self.text.push_str(" "); - Ok(()) - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let text = value.serialize(ConvertToString)?; - self.text.push_str(&text); - Ok(()) - } - - fn end(self) -> Result { - Ok(self.text) - } -} - -pub struct StructConvertToString { - text: String, -} - -impl ser::SerializeStruct for StructConvertToString { - type Ok = String; - type Error = SerializerError; - - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let value = value.serialize(ConvertToString)?; - self.text.push_str(key); - self.text.push_str(" "); - self.text.push_str(&value); - Ok(()) - } - - fn end(self) -> Result { - Ok(self.text) - } -} - -pub struct SeqConvertToString { - text: String, -} - -impl ser::SerializeSeq for SeqConvertToString { - type Ok = String; - type Error = SerializerError; - - fn serialize_element(&mut self, key: &T) -> Result<(), Self::Error> - where - T: ser::Serialize, - { - let text = key.serialize(ConvertToString)?; - self.text.push_str(&text); - self.text.push_str(" "); - Ok(()) - } - - fn end(self) -> Result { - Ok(self.text) - } -} diff --git a/meilisearch-core/src/serde/extract_document_id.rs b/meilisearch-core/src/serde/extract_document_id.rs deleted file mode 100644 index c7303a8ec..000000000 --- a/meilisearch-core/src/serde/extract_document_id.rs +++ /dev/null @@ -1,310 +0,0 @@ -use std::hash::{Hash, Hasher}; - -use crate::DocumentId; -use serde::{ser, Serialize}; -use serde_json::{Value, Number}; -use siphasher::sip::SipHasher; - -use super::{ConvertToString, SerializerError}; - -pub fn extract_document_id( - primary_key: &str, - document: &D, -) -> Result, SerializerError> -where - D: serde::Serialize, -{ - let serializer = ExtractDocumentId { primary_key }; - document.serialize(serializer) -} - -fn validate_number(value: &Number) -> Option { - if value.is_f64() { - return None - } - Some(value.to_string()) -} - -fn validate_string(value: &str) -> Option { - if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') { - Some(value.to_string()) - } else { - None - } -} - -pub fn value_to_string(value: &Value) -> Option { - match value { - Value::Null => None, - Value::Bool(_) => None, - Value::Number(value) => validate_number(value), - Value::String(value) => validate_string(value), - Value::Array(_) => None, - Value::Object(_) => None, - } -} - -pub fn compute_document_id(t: H) -> DocumentId { - let mut s = SipHasher::new(); - t.hash(&mut s); - let hash = s.finish(); - DocumentId(hash) -} - -struct ExtractDocumentId<'a> { - primary_key: &'a str, -} - -impl<'a> ser::Serializer for ExtractDocumentId<'a> { - type Ok = Option; - type Error = SerializerError; - type SerializeSeq = ser::Impossible; - type SerializeTuple = ser::Impossible; - type SerializeTupleStruct = ser::Impossible; - type SerializeTupleVariant = ser::Impossible; - type SerializeMap = ExtractDocumentIdMapSerializer<'a>; - type SerializeStruct = ExtractDocumentIdStructSerializer<'a>; - type SerializeStructVariant = ser::Impossible; - - forward_to_unserializable_type! { - bool => serialize_bool, - char => serialize_char, - - i8 => serialize_i8, - i16 => serialize_i16, - i32 => serialize_i32, - i64 => serialize_i64, - - u8 => serialize_u8, - u16 => serialize_u16, - u32 => serialize_u32, - u64 => serialize_u64, - - f32 => serialize_f32, - f64 => serialize_f64, - } - - fn serialize_str(self, _value: &str) -> Result { - Err(SerializerError::UnserializableType { type_name: "str" }) - } - - fn serialize_bytes(self, _value: &[u8]) -> Result { - Err(SerializerError::UnserializableType { type_name: "&[u8]" }) - } - - fn serialize_none(self) -> Result { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_some(self, _value: &T) -> Result - where - T: Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "Option", - }) - } - - fn serialize_unit(self) -> Result { - Err(SerializerError::UnserializableType { type_name: "()" }) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit struct", - }) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "unit variant", - }) - } - - fn serialize_newtype_struct( - self, - _name: &'static str, - value: &T, - ) -> Result - where - T: Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _value: &T, - ) -> Result - where - T: Serialize, - { - Err(SerializerError::UnserializableType { - type_name: "newtype variant", - }) - } - - fn serialize_seq(self, _len: Option) -> Result { - Err(SerializerError::UnserializableType { - type_name: "sequence", - }) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Err(SerializerError::UnserializableType { type_name: "tuple" }) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple struct", - }) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "tuple variant", - }) - } - - fn serialize_map(self, _len: Option) -> Result { - let serializer = ExtractDocumentIdMapSerializer { - primary_key: self.primary_key, - document_id: None, - current_key_name: None, - }; - - Ok(serializer) - } - - fn serialize_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - let serializer = ExtractDocumentIdStructSerializer { - primary_key: self.primary_key, - document_id: None, - }; - - Ok(serializer) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - _variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Err(SerializerError::UnserializableType { - type_name: "struct variant", - }) - } -} - -pub struct ExtractDocumentIdMapSerializer<'a> { - primary_key: &'a str, - document_id: Option, - current_key_name: Option, -} - -impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> { - type Ok = Option; - type Error = SerializerError; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - let key = key.serialize(ConvertToString)?; - self.current_key_name = Some(key); - Ok(()) - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: Serialize, - { - let key = self.current_key_name.take().unwrap(); - self.serialize_entry(&key, value) - } - - fn serialize_entry( - &mut self, - key: &K, - value: &V, - ) -> Result<(), Self::Error> - where - K: Serialize, - V: Serialize, - { - let key = key.serialize(ConvertToString)?; - - if self.primary_key == key { - let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; - match value_to_string(&value).map(|s| compute_document_id(&s)) { - Some(document_id) => self.document_id = Some(document_id), - None => return Err(SerializerError::InvalidDocumentIdType), - } - } - - Ok(()) - } - - fn end(self) -> Result { - Ok(self.document_id) - } -} - -pub struct ExtractDocumentIdStructSerializer<'a> { - primary_key: &'a str, - document_id: Option, -} - -impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> { - type Ok = Option; - type Error = SerializerError; - - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: Serialize, - { - if self.primary_key == key { - let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; - match value_to_string(&value).map(compute_document_id) { - Some(document_id) => self.document_id = Some(document_id), - None => return Err(SerializerError::InvalidDocumentIdType), - } - } - - Ok(()) - } - - fn end(self) -> Result { - Ok(self.document_id) - } -} diff --git a/meilisearch-core/src/serde/mod.rs b/meilisearch-core/src/serde/mod.rs index 50e18687e..90b2843c9 100644 --- a/meilisearch-core/src/serde/mod.rs +++ b/meilisearch-core/src/serde/mod.rs @@ -1,20 +1,6 @@ -macro_rules! forward_to_unserializable_type { - ($($ty:ident => $se_method:ident,)*) => { - $( - fn $se_method(self, _v: $ty) -> Result { - Err(SerializerError::UnserializableType { type_name: "$ty" }) - } - )* - } -} - -mod convert_to_string; mod deserializer; -mod extract_document_id; -pub use self::convert_to_string::ConvertToString; pub use self::deserializer::{Deserializer, DeserializerError}; -pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string}; use std::{error::Error, fmt}; @@ -27,7 +13,7 @@ use crate::ParseNumberError; #[derive(Debug)] pub enum SerializerError { DocumentIdNotFound, - InvalidDocumentIdType, + InvalidDocumentIdFormat, Zlmdb(heed::Error), SerdeJson(SerdeJsonError), ParseNumber(ParseNumberError), @@ -50,7 +36,7 @@ impl fmt::Display for SerializerError { SerializerError::DocumentIdNotFound => { f.write_str("serialized document does not have an id according to the schema") } - SerializerError::InvalidDocumentIdType => { + SerializerError::InvalidDocumentIdFormat => { f.write_str("a document primary key can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).") } SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e), diff --git a/meilisearch-core/src/update/documents_addition.rs b/meilisearch-core/src/update/documents_addition.rs index e737e5068..ec99858e2 100644 --- a/meilisearch-core/src/update/documents_addition.rs +++ b/meilisearch-core/src/update/documents_addition.rs @@ -1,11 +1,13 @@ use std::collections::HashMap; use std::fmt::Write as _; +use std::hash::{Hash, Hasher}; use fst::{set::OpBuilder, SetBuilder}; use indexmap::IndexMap; use sdset::{duo::Union, SetOperation}; use serde::Deserialize; use serde_json::Value; +use siphasher::sip::SipHasher; use meilisearch_types::DocumentId; use meilisearch_schema::IndexedPos; @@ -14,7 +16,7 @@ use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; use crate::raw_indexer::RawIndexer; -use crate::serde::{extract_document_id, Deserializer}; +use crate::serde::{Deserializer, SerializerError}; use crate::store; use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update}; use crate::{Error, Number, MResult, RankedMap}; @@ -148,7 +150,7 @@ fn index_value( } // TODO move this helper functions elsewhere -fn value_to_string(value: &Value) -> String { +pub fn value_to_string(value: &Value) -> String { fn internal_value_to_string(string: &mut String, value: &Value) { match value { Value::Null => (), @@ -191,6 +193,39 @@ fn value_to_number(value: &Value) -> Option { } } +// TODO move this helper functions elsewhere +pub fn compute_document_id(t: H) -> DocumentId { + let mut s = SipHasher::new(); + t.hash(&mut s); + let hash = s.finish(); + DocumentId(hash) +} + +// TODO move this helper functions elsewhere +pub fn extract_document_id(primary_key: &str, document: &IndexMap) -> Result { + + fn validate_document_id(string: &str) -> bool { + string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + } + + match document.get(primary_key) { + Some(value) => { + let string = match value { + Value::Number(number) => number.to_string(), + Value::String(string) => string.clone(), + _ => return Err(SerializerError::InvalidDocumentIdFormat), + }; + + if validate_document_id(&string) { + Ok(compute_document_id(string)) + } else { + Err(SerializerError::InvalidDocumentIdFormat) + } + } + None => Err(SerializerError::DocumentIdNotFound), + } +} + pub fn apply_addition<'a, 'b>( writer: &'a mut heed::RwTxn<'b, MainT>, index: &store::Index, @@ -208,10 +243,7 @@ pub fn apply_addition<'a, 'b>( // 1. store documents ids for future deletion for mut document in addition { - let document_id = match extract_document_id(&primary_key, &document)? { - Some(id) => id, - None => return Err(Error::MissingDocumentId), - }; + let document_id = extract_document_id(&primary_key, &document)?; if partial { let mut deserializer = Deserializer { diff --git a/meilisearch-core/src/update/documents_deletion.rs b/meilisearch-core/src/update/documents_deletion.rs index 30d563efb..4526d053d 100644 --- a/meilisearch-core/src/update/documents_deletion.rs +++ b/meilisearch-core/src/update/documents_deletion.rs @@ -1,13 +1,11 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use fst::{SetBuilder, Streamer}; -use meilisearch_schema::Schema; use sdset::{duo::DifferenceByKey, SetBuf, SetOperation}; use crate::database::{MainT, UpdateT}; use crate::database::{UpdateEvent, UpdateEventsEmitter}; use crate::facets; -use crate::serde::extract_document_id; use crate::store; use crate::update::{next_update_id, compute_short_prefixes, Update}; use crate::{DocumentId, Error, MResult, RankedMap}; @@ -37,21 +35,6 @@ impl DocumentsDeletion { self.documents.push(document_id); } - pub fn delete_document(&mut self, schema: &Schema, document: D) -> MResult<()> - where - D: serde::Serialize, - { - let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?; - let document_id = match extract_document_id(&primary_key, &document)? { - Some(id) => id, - None => return Err(Error::MissingDocumentId), - }; - - self.delete_document_by_id(document_id); - - Ok(()) - } - pub fn finalize(self, writer: &mut heed::RwTxn) -> MResult { let _ = self.updates_notifier.send(UpdateEvent::NewUpdate); let update_id = push_documents_deletion( diff --git a/meilisearch-core/src/update/mod.rs b/meilisearch-core/src/update/mod.rs index d9a2d41a7..6599c3f99 100644 --- a/meilisearch-core/src/update/mod.rs +++ b/meilisearch-core/src/update/mod.rs @@ -8,6 +8,7 @@ pub use self::clear_all::{apply_clear_all, push_clear_all}; pub use self::customs_update::{apply_customs_update, push_customs_update}; pub use self::documents_addition::{ apply_documents_addition, apply_documents_partial_addition, DocumentsAddition, + value_to_string, compute_document_id, extract_document_id, }; pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion}; pub use self::settings_update::{apply_settings_update, push_settings_update}; diff --git a/meilisearch-http/src/routes/document.rs b/meilisearch-http/src/routes/document.rs index ad90df8ca..4ef4027dc 100644 --- a/meilisearch-http/src/routes/document.rs +++ b/meilisearch-http/src/routes/document.rs @@ -42,7 +42,7 @@ async fn get_document( .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = meilisearch_core::serde::compute_document_id(&path.document_id); + let document_id = meilisearch_core::update::compute_document_id(&path.document_id); let reader = data.db.main_read_txn()?; @@ -65,7 +65,7 @@ async fn delete_document( .db .open_index(&path.index_uid) .ok_or(ResponseError::index_not_found(&path.index_uid))?; - let document_id = meilisearch_core::serde::compute_document_id(&path.document_id); + let document_id = meilisearch_core::update::compute_document_id(&path.document_id); let mut update_writer = data.db.update_write_txn()?; @@ -237,10 +237,9 @@ async fn delete_documents( let mut documents_deletion = index.documents_deletion(); for document_id in body.into_inner() { - if let Some(document_id) = meilisearch_core::serde::value_to_string(&document_id) { - documents_deletion - .delete_document_by_id(meilisearch_core::serde::compute_document_id(document_id)); - } + let document_id_string = meilisearch_core::update::value_to_string(&document_id); + let document_id = meilisearch_core::update::compute_document_id(document_id_string); + documents_deletion.delete_document_by_id(document_id); } let update_id = documents_deletion.finalize(&mut writer)?;