Remove TopLevelMap

This commit is contained in:
Louis Dureuil 2024-11-21 16:56:46 +01:00
parent 221e547e86
commit dcc3caef0d
No known key found for this signature in database
3 changed files with 10 additions and 72 deletions

View File

@ -6,7 +6,6 @@ use std::marker::PhantomData;
use bumpalo::Bump; use bumpalo::Bump;
use memmap2::Mmap; use memmap2::Mmap;
use milli::documents::Error; use milli::documents::Error;
use milli::update::new::TopLevelMap;
use milli::Object; use milli::Object;
use raw_collections::RawMap; use raw_collections::RawMap;
use serde::de::{SeqAccess, Visitor}; use serde::de::{SeqAccess, Visitor};
@ -212,7 +211,7 @@ pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result<u
/// Reads JSON from file and write it in NDJSON in a file checking it along the way. /// Reads JSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> { pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserialize into a TopLevelMap<'pl> that // We memory map to be able to deserialize into a RawMap that
// does not allocate when possible and only materialize the first/top level. // does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
@ -253,16 +252,23 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way. /// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> { pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
// We memory map to be able to deserialize into a TopLevelMap<'pl> that // We memory map to be able to deserialize into a RawMap that
// does not allocate when possible and only materialize the first/top level. // does not allocate when possible and only materialize the first/top level.
let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
let mut output = BufWriter::new(output); let mut output = BufWriter::new(output);
let mut bump = Bump::with_capacity(1024 * 1024);
let mut count = 0; let mut count = 0;
for result in serde_json::Deserializer::from_slice(&input).into_iter() { for result in serde_json::Deserializer::from_slice(&input).into_iter() {
bump.reset();
count += 1; count += 1;
result result
.and_then(|map: TopLevelMap| to_writer(&mut output, &map)) .and_then(|raw: &RawValue| {
// try to deserialize as a map
let map = RawMap::from_raw_value(raw, &bump)?;
to_writer(&mut output, &map)
})
.map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?; .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
} }

View File

@ -2,7 +2,6 @@ pub use document_change::{Deletion, DocumentChange, Insertion, Update};
pub use merger::{ pub use merger::{
merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta,
}; };
pub use top_level_map::{CowStr, TopLevelMap};
use super::del_add::DelAdd; use super::del_add::DelAdd;
use crate::FieldId; use crate::FieldId;
@ -19,7 +18,6 @@ mod parallel_iterator_ext;
mod ref_cell_ext; mod ref_cell_ext;
pub(crate) mod steps; pub(crate) mod steps;
pub(crate) mod thread_local; pub(crate) mod thread_local;
mod top_level_map;
pub mod vector_document; pub mod vector_document;
mod word_fst_builder; mod word_fst_builder;
mod words_prefix_docids; mod words_prefix_docids;

View File

@ -1,66 +0,0 @@
use std::borrow::{Borrow, Cow};
use std::collections::BTreeMap;
use std::{fmt, ops};
use serde::{Deserialize, Serialize};
use serde_json::value::RawValue;
use serde_json::{Map, Value};
#[derive(Deserialize, Serialize)]
pub struct TopLevelMap<'p>(#[serde(borrow)] pub BTreeMap<CowStr<'p>, &'p RawValue>);
impl TryFrom<&'_ TopLevelMap<'_>> for Map<String, Value> {
type Error = serde_json::Error;
fn try_from(tlmap: &TopLevelMap<'_>) -> Result<Self, Self::Error> {
let mut object = Map::new();
for (k, v) in &tlmap.0 {
let value = serde_json::from_str(v.get())?;
object.insert(k.to_string(), value);
}
Ok(object)
}
}
impl TryFrom<TopLevelMap<'_>> for Map<String, Value> {
type Error = serde_json::Error;
fn try_from(tlmap: TopLevelMap<'_>) -> Result<Self, Self::Error> {
TryFrom::try_from(&tlmap)
}
}
impl<'p> ops::Deref for TopLevelMap<'p> {
type Target = BTreeMap<CowStr<'p>, &'p RawValue>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl ops::DerefMut for TopLevelMap<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[derive(Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
pub struct CowStr<'p>(#[serde(borrow)] pub Cow<'p, str>);
impl fmt::Display for CowStr<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}
impl AsRef<str> for CowStr<'_> {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}
impl<'doc> Borrow<str> for CowStr<'doc> {
fn borrow(&self) -> &str {
self.0.borrow()
}
}