meilisearch/milli/src/documents/serde_impl.rs

135 lines
3.4 KiB
Rust
Raw Normal View History

2021-10-11 20:58:40 +08:00
use std::collections::BTreeMap;
2021-10-21 03:26:52 +08:00
use std::fmt;
2021-10-25 23:38:32 +08:00
use std::io::{Cursor, Write};
2021-10-21 03:26:52 +08:00
use byteorder::WriteBytesExt;
2021-10-25 23:38:32 +08:00
use serde::de::{DeserializeSeed, MapAccess, SeqAccess, Visitor};
2021-10-21 03:26:52 +08:00
use serde::Deserialize;
2021-10-11 20:58:40 +08:00
use serde_json::Value;
2021-10-25 23:38:32 +08:00
use super::{ByteCounter, DocumentsBatchIndex, Error};
use crate::FieldId;
2021-10-24 21:39:56 +08:00
macro_rules! tri {
($e:expr) => {
match $e {
Ok(r) => r,
Err(e) => return Ok(Err(e.into())),
}
};
}
2021-10-21 03:26:52 +08:00
struct FieldIdResolver<'a>(&'a mut DocumentsBatchIndex);
2021-10-21 03:26:52 +08:00
impl<'a, 'de> DeserializeSeed<'de> for FieldIdResolver<'a> {
type Value = FieldId;
2021-10-21 03:26:52 +08:00
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
2021-10-25 23:38:32 +08:00
D: serde::Deserializer<'de>,
{
deserializer.deserialize_str(self)
}
2021-10-21 03:26:52 +08:00
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de> Visitor<'de> for FieldIdResolver<'a> {
type Value = FieldId;
2021-10-21 03:26:52 +08:00
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
2021-10-21 17:05:16 +08:00
E: serde::de::Error,
{
2021-10-25 23:38:32 +08:00
Ok(self.0.insert(v))
}
2021-10-24 21:39:56 +08:00
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a string")
}
}
2021-10-21 03:26:52 +08:00
struct ValueDeserializer;
2021-10-21 03:26:52 +08:00
impl<'de> DeserializeSeed<'de> for ValueDeserializer {
type Value = serde_json::Value;
2021-10-21 03:26:52 +08:00
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
2021-10-25 23:38:32 +08:00
D: serde::Deserializer<'de>,
{
serde_json::Value::deserialize(deserializer)
}
}
2021-10-21 03:26:52 +08:00
pub struct DocumentVisitor<'a, W> {
pub inner: &'a mut ByteCounter<W>,
pub index: &'a mut DocumentsBatchIndex,
pub obkv_buffer: &'a mut Vec<u8>,
pub value_buffer: &'a mut Vec<u8>,
pub values: &'a mut BTreeMap<FieldId, Value>,
pub count: &'a mut usize,
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
/// This Visitor value is nothing, since it write the value to a file.
2021-10-24 21:39:56 +08:00
type Value = Result<(), Error>;
2021-10-21 03:26:52 +08:00
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
2021-10-21 03:26:52 +08:00
A: SeqAccess<'de>,
{
2021-10-25 23:38:32 +08:00
while let Some(v) = seq.next_element_seed(&mut *self)? {
tri!(v)
}
2021-10-24 21:39:56 +08:00
Ok(Ok(()))
}
2021-10-21 03:26:52 +08:00
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
2021-10-21 03:26:52 +08:00
A: MapAccess<'de>,
{
2021-10-25 23:38:32 +08:00
while let Some((key, value)) =
map.next_entry_seed(FieldIdResolver(&mut *self.index), ValueDeserializer)?
{
2021-10-21 03:26:52 +08:00
self.values.insert(key, value);
}
2021-10-21 03:26:52 +08:00
self.obkv_buffer.clear();
let mut obkv = obkv::KvWriter::new(Cursor::new(&mut *self.obkv_buffer));
for (key, value) in self.values.iter() {
self.value_buffer.clear();
// This is guaranteed to work
2021-10-24 21:39:56 +08:00
tri!(serde_json::to_writer(Cursor::new(&mut *self.value_buffer), value));
tri!(obkv.insert(*key, &self.value_buffer));
2021-10-21 03:26:52 +08:00
}
2021-10-24 21:39:56 +08:00
let reader = tri!(obkv.into_inner()).into_inner();
2021-10-24 21:39:56 +08:00
tri!(self.inner.write_u32::<byteorder::BigEndian>(reader.len() as u32));
tri!(self.inner.write_all(reader));
2021-10-21 03:26:52 +08:00
*self.count += 1;
2021-10-24 20:41:36 +08:00
self.values.clear();
2021-10-24 21:39:56 +08:00
Ok(Ok(()))
}
2021-10-21 03:26:52 +08:00
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.")
}
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de, W> DeserializeSeed<'de> for &mut DocumentVisitor<'a, W>
2021-10-25 23:38:32 +08:00
where
W: Write,
2021-10-21 03:26:52 +08:00
{
2021-10-24 21:39:56 +08:00
type Value = Result<(), Error>;
2021-10-21 03:26:52 +08:00
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
2021-10-25 23:38:32 +08:00
D: serde::Deserializer<'de>,
{
deserializer.deserialize_map(self)
}
}