From f5b0eb044a48189b639869a12f4cb92f2300075e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 29 Aug 2019 11:37:22 +0200 Subject: [PATCH] fix: Transform the identifier value into a string before hashing it --- meilidb-data/Cargo.toml | 4 +-- meilidb-data/src/serde/extract_document_id.rs | 32 +++++++++++++------ meilidb-data/src/serde/mod.rs | 15 ++++++++- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index d50605ef7..e3e998cdf 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -15,7 +15,8 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" } ordered-float = { version = "1.0.2", features = ["serde"] } rocksdb = { version = "0.12.2", default-features = false } sdset = "0.3.2" -serde = { version = "1.0.91", features = ["derive"] } +serde = { version = "1.0.99", features = ["derive"] } +serde_json = "1.0.40" siphasher = "0.3.0" zerocopy = "0.2.2" @@ -29,4 +30,3 @@ branch = "arc-byte-slice" [dev-dependencies] tempfile = "3.0.7" -serde_json = "1.0.39" diff --git a/meilidb-data/src/serde/extract_document_id.rs b/meilidb-data/src/serde/extract_document_id.rs index 3ec2c200b..27c99b03e 100644 --- a/meilidb-data/src/serde/extract_document_id.rs +++ b/meilidb-data/src/serde/extract_document_id.rs @@ -2,6 +2,7 @@ use std::hash::{Hash, Hasher}; use meilidb_core::DocumentId; use serde::{ser, Serialize}; +use serde_json::Value; use siphasher::sip::SipHasher; use super::{SerializerError, ConvertToString}; @@ -16,7 +17,18 @@ where D: serde::Serialize, document.serialize(serializer) } -pub fn compute_document_id(t: &T) -> DocumentId { +pub fn value_to_string(value: &Value) -> Option { + match value { + Value::Null => None, + Value::Bool(_) => None, + Value::Number(value) => Some(value.to_string()), + Value::String(value) => Some(value.to_string()), + Value::Array(_) => None, + Value::Object(_) => None, + } +} + +pub fn compute_document_id(t: H) -> DocumentId { let mut s = SipHasher::new(); t.hash(&mut s); let hash = s.finish(); @@ -213,10 +225,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> { let key = key.serialize(ConvertToString)?; if self.identifier == key { - // TODO is it possible to have multiple ids? - let id = bincode::serialize(value).unwrap(); - let document_id = compute_document_id(&id); - self.document_id = Some(document_id); + let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; + match value_to_string(&value).map(|s| compute_document_id(&s)) { + Some(document_id) => self.document_id = Some(document_id), + None => return Err(SerializerError::InvalidDocumentIdType), + } } Ok(()) @@ -244,10 +257,11 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> { where T: Serialize, { if self.identifier == key { - // TODO can it be possible to have multiple ids? - let id = bincode::serialize(value).unwrap(); - let document_id = compute_document_id(&id); - self.document_id = Some(document_id); + let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?; + match value_to_string(&value).map(compute_document_id) { + Some(document_id) => self.document_id = Some(document_id), + None => return Err(SerializerError::InvalidDocumentIdType), + } } Ok(()) diff --git a/meilidb-data/src/serde/mod.rs b/meilidb-data/src/serde/mod.rs index 9ffa619ed..b19d579ef 100644 --- a/meilidb-data/src/serde/mod.rs +++ b/meilidb-data/src/serde/mod.rs @@ -28,6 +28,7 @@ use std::{fmt, error::Error}; use meilidb_core::DocumentId; use meilidb_schema::SchemaAttr; use rmp_serde::encode::Error as RmpError; +use serde_json::Error as SerdeJsonError; use serde::ser; use crate::number::ParseNumberError; @@ -35,7 +36,9 @@ use crate::number::ParseNumberError; #[derive(Debug)] pub enum SerializerError { DocumentIdNotFound, + InvalidDocumentIdType, RmpError(RmpError), + SerdeJsonError(SerdeJsonError), RocksdbError(rocksdb::Error), ParseNumberError(ParseNumberError), UnserializableType { type_name: &'static str }, @@ -55,8 +58,12 @@ impl fmt::Display for SerializerError { match self { SerializerError::DocumentIdNotFound => { write!(f, "serialized document does not have an id according to the schema") - } + }, + SerializerError::InvalidDocumentIdType => { + write!(f, "document identifier can only be of type string or number") + }, SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e), + SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e), SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e), SerializerError::ParseNumberError(e) => { write!(f, "error while trying to parse a number: {}", e) @@ -89,6 +96,12 @@ impl From for SerializerError { } } +impl From for SerializerError { + fn from(error: SerdeJsonError) -> SerializerError { + SerializerError::SerdeJsonError(error) + } +} + impl From for SerializerError { fn from(error: rocksdb::Error) -> SerializerError { SerializerError::RocksdbError(error)