meilisearch/milli/src/documents/serde.rs

122 lines
3.3 KiB
Rust
Raw Normal View History

2021-10-11 20:58:40 +08:00
use std::collections::BTreeMap;
use std::io::Cursor;
2021-10-21 03:26:52 +08:00
use std::io::Write;
use std::fmt;
use byteorder::WriteBytesExt;
use serde::Deserialize;
use serde::de::DeserializeSeed;
use serde::de::MapAccess;
use serde::de::SeqAccess;
use serde::de::Visitor;
2021-10-11 20:58:40 +08:00
use serde_json::Value;
2021-10-21 03:26:52 +08:00
use super::{ByteCounter, DocumentsBatchIndex};
use crate::FieldId;
2021-10-21 03:26:52 +08:00
struct FieldIdResolver<'a>(&'a mut DocumentsBatchIndex);
2021-10-21 03:26:52 +08:00
impl<'a, 'de> DeserializeSeed<'de> for FieldIdResolver<'a> {
type Value = FieldId;
2021-10-21 03:26:52 +08:00
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
2021-10-21 03:26:52 +08:00
D: serde::Deserializer<'de> {
deserializer.deserialize_str(self)
}
2021-10-21 03:26:52 +08:00
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de> Visitor<'de> for FieldIdResolver<'a> {
type Value = FieldId;
2021-10-21 03:26:52 +08:00
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
2021-10-21 17:05:16 +08:00
E: serde::de::Error,
{
Ok(self.0.insert(v))
}
2021-10-21 03:26:52 +08:00
fn expecting(&self, _formatter: &mut fmt::Formatter) -> fmt::Result {
todo!()
}
}
2021-10-21 03:26:52 +08:00
struct ValueDeserializer;
2021-10-21 03:26:52 +08:00
impl<'de> DeserializeSeed<'de> for ValueDeserializer {
type Value = serde_json::Value;
2021-10-21 03:26:52 +08:00
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
2021-10-21 03:26:52 +08:00
D: serde::Deserializer<'de> {
serde_json::Value::deserialize(deserializer)
}
}
2021-10-21 03:26:52 +08:00
pub struct DocumentVisitor<'a, W> {
pub inner: &'a mut ByteCounter<W>,
pub index: &'a mut DocumentsBatchIndex,
pub obkv_buffer: &'a mut Vec<u8>,
pub value_buffer: &'a mut Vec<u8>,
pub values: &'a mut BTreeMap<FieldId, Value>,
pub count: &'a mut usize,
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
/// This Visitor value is nothing, since it write the value to a file.
type Value = ();
2021-10-21 03:26:52 +08:00
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
2021-10-21 03:26:52 +08:00
A: SeqAccess<'de>,
{
2021-10-21 03:26:52 +08:00
while let Some(_) = seq.next_element_seed(&mut *self)? { }
Ok(())
}
2021-10-21 03:26:52 +08:00
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
where
2021-10-21 03:26:52 +08:00
A: MapAccess<'de>,
{
2021-10-21 03:26:52 +08:00
while let Some((key, value)) = map.next_entry_seed(FieldIdResolver(&mut *self.index), ValueDeserializer).unwrap() {
self.values.insert(key, value);
}
2021-10-21 03:26:52 +08:00
self.obkv_buffer.clear();
let mut obkv = obkv::KvWriter::new(Cursor::new(&mut *self.obkv_buffer));
for (key, value) in self.values.iter() {
self.value_buffer.clear();
// This is guaranteed to work
serde_json::to_writer(Cursor::new(&mut *self.value_buffer), value).unwrap();
obkv.insert(*key, &self.value_buffer).unwrap();
}
2021-10-21 03:26:52 +08:00
let reader = obkv.into_inner().unwrap().into_inner();
2021-10-21 03:26:52 +08:00
self.inner.write_u32::<byteorder::BigEndian>(reader.len() as u32).unwrap();
self.inner.write_all(reader).unwrap();
2021-10-21 03:26:52 +08:00
*self.count += 1;
2021-10-24 20:41:36 +08:00
self.values.clear();
2021-10-21 03:26:52 +08:00
Ok(())
}
2021-10-21 03:26:52 +08:00
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "a documents, or a sequence of documents.")
}
}
2021-10-21 03:26:52 +08:00
impl<'a, 'de, W> DeserializeSeed<'de> for &mut DocumentVisitor<'a, W>
where W: Write,
{
type Value = ();
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
where
D: serde::Deserializer<'de> {
deserializer.deserialize_map(self)
}
}