2021-10-11 20:58:40 +08:00
|
|
|
use std::collections::BTreeMap;
|
|
|
|
use std::io::Cursor;
|
2021-10-21 03:26:52 +08:00
|
|
|
use std::io::Write;
|
|
|
|
use std::fmt;
|
|
|
|
|
|
|
|
use byteorder::WriteBytesExt;
|
|
|
|
use serde::Deserialize;
|
|
|
|
use serde::de::DeserializeSeed;
|
|
|
|
use serde::de::MapAccess;
|
|
|
|
use serde::de::SeqAccess;
|
|
|
|
use serde::de::Visitor;
|
2021-10-11 20:58:40 +08:00
|
|
|
use serde_json::Value;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
use super::{ByteCounter, DocumentsBatchIndex};
|
2021-08-31 17:44:15 +08:00
|
|
|
use crate::FieldId;
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
struct FieldIdResolver<'a>(&'a mut DocumentsBatchIndex);
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
impl<'a, 'de> DeserializeSeed<'de> for FieldIdResolver<'a> {
|
|
|
|
type Value = FieldId;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
2021-08-31 17:44:15 +08:00
|
|
|
where
|
2021-10-21 03:26:52 +08:00
|
|
|
D: serde::Deserializer<'de> {
|
|
|
|
deserializer.deserialize_str(self)
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
2021-10-21 03:26:52 +08:00
|
|
|
}
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
impl<'a, 'de> Visitor<'de> for FieldIdResolver<'a> {
|
|
|
|
type Value = FieldId;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
2021-08-31 17:44:15 +08:00
|
|
|
where
|
2021-10-21 03:26:52 +08:00
|
|
|
E: serde::de::Error, {
|
|
|
|
let field_id = match self.0.get_by_right(v) {
|
|
|
|
Some(field_id) => *field_id,
|
|
|
|
None => {
|
|
|
|
let field_id = self.0.len() as FieldId;
|
|
|
|
self.0.insert(field_id, v.to_string());
|
|
|
|
field_id
|
|
|
|
}
|
|
|
|
};
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
Ok(field_id)
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn expecting(&self, _formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
todo!()
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
struct ValueDeserializer;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
impl<'de> DeserializeSeed<'de> for ValueDeserializer {
|
|
|
|
type Value = serde_json::Value;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
2021-08-31 17:44:15 +08:00
|
|
|
where
|
2021-10-21 03:26:52 +08:00
|
|
|
D: serde::Deserializer<'de> {
|
|
|
|
serde_json::Value::deserialize(deserializer)
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
pub struct DocumentVisitor<'a, W> {
|
|
|
|
pub inner: &'a mut ByteCounter<W>,
|
|
|
|
pub index: &'a mut DocumentsBatchIndex,
|
|
|
|
pub obkv_buffer: &'a mut Vec<u8>,
|
|
|
|
pub value_buffer: &'a mut Vec<u8>,
|
|
|
|
pub values: &'a mut BTreeMap<FieldId, Value>,
|
|
|
|
pub count: &'a mut usize,
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
|
|
|
|
/// This Visitor value is nothing, since it write the value to a file.
|
|
|
|
type Value = ();
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
|
2021-08-31 17:44:15 +08:00
|
|
|
where
|
2021-10-21 03:26:52 +08:00
|
|
|
A: SeqAccess<'de>,
|
2021-08-31 17:44:15 +08:00
|
|
|
{
|
2021-10-21 03:26:52 +08:00
|
|
|
while let Some(_) = seq.next_element_seed(&mut *self)? { }
|
2021-08-31 17:44:15 +08:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
|
2021-08-31 17:44:15 +08:00
|
|
|
where
|
2021-10-21 03:26:52 +08:00
|
|
|
A: MapAccess<'de>,
|
2021-08-31 17:44:15 +08:00
|
|
|
{
|
2021-10-21 03:26:52 +08:00
|
|
|
while let Some((key, value)) = map.next_entry_seed(FieldIdResolver(&mut *self.index), ValueDeserializer).unwrap() {
|
|
|
|
self.values.insert(key, value);
|
|
|
|
}
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
self.obkv_buffer.clear();
|
|
|
|
let mut obkv = obkv::KvWriter::new(Cursor::new(&mut *self.obkv_buffer));
|
|
|
|
for (key, value) in self.values.iter() {
|
|
|
|
self.value_buffer.clear();
|
|
|
|
// This is guaranteed to work
|
|
|
|
serde_json::to_writer(Cursor::new(&mut *self.value_buffer), value).unwrap();
|
|
|
|
obkv.insert(*key, &self.value_buffer).unwrap();
|
|
|
|
}
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
let reader = obkv.into_inner().unwrap().into_inner();
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
self.inner.write_u32::<byteorder::BigEndian>(reader.len() as u32).unwrap();
|
|
|
|
self.inner.write_all(reader).unwrap();
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
*self.count += 1;
|
2021-08-31 17:44:15 +08:00
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
Ok(())
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
write!(f, "a documents, or a sequence of documents.")
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-21 03:26:52 +08:00
|
|
|
impl<'a, 'de, W> DeserializeSeed<'de> for &mut DocumentVisitor<'a, W>
|
|
|
|
where W: Write,
|
|
|
|
{
|
|
|
|
type Value = ();
|
|
|
|
|
|
|
|
fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
|
|
|
where
|
|
|
|
D: serde::Deserializer<'de> {
|
|
|
|
deserializer.deserialize_map(self)
|
2021-08-31 17:44:15 +08:00
|
|
|
}
|
|
|
|
}
|