diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs index aec236949..096349448 100644 --- a/crates/meilisearch-types/src/document_formats.rs +++ b/crates/meilisearch-types/src/document_formats.rs @@ -6,7 +6,6 @@ use std::marker::PhantomData; use bumpalo::Bump; use memmap2::Mmap; use milli::documents::Error; -use milli::update::new::TopLevelMap; use milli::Object; use raw_collections::RawMap; use serde::de::{SeqAccess, Visitor}; @@ -212,7 +211,7 @@ pub fn read_csv(input: &File, output: impl io::Write, delimiter: u8) -> Result Result { - // We memory map to be able to deserialize into a TopLevelMap<'pl> that + // We memory map to be able to deserialize into a RawMap that // does not allocate when possible and only materialize the first/top level. let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB @@ -253,16 +252,23 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result { /// Reads NDJSON from file and write it in NDJSON in a file checking it along the way. pub fn read_ndjson(input: &File, output: impl io::Write) -> Result { - // We memory map to be able to deserialize into a TopLevelMap<'pl> that + // We memory map to be able to deserialize into a RawMap that // does not allocate when possible and only materialize the first/top level. let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? }; let mut output = BufWriter::new(output); + let mut bump = Bump::with_capacity(1024 * 1024); + let mut count = 0; for result in serde_json::Deserializer::from_slice(&input).into_iter() { + bump.reset(); count += 1; result - .and_then(|map: TopLevelMap| to_writer(&mut output, &map)) + .and_then(|raw: &RawValue| { + // try to deserialize as a map + let map = RawMap::from_raw_value(raw, &bump)?; + to_writer(&mut output, &map) + }) .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?; } diff --git a/crates/milli/src/update/new/mod.rs b/crates/milli/src/update/new/mod.rs index 140f4ccf0..87995ee55 100644 --- a/crates/milli/src/update/new/mod.rs +++ b/crates/milli/src/update/new/mod.rs @@ -2,7 +2,6 @@ pub use document_change::{Deletion, DocumentChange, Insertion, Update}; pub use merger::{ merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, }; -pub use top_level_map::{CowStr, TopLevelMap}; use super::del_add::DelAdd; use crate::FieldId; @@ -19,7 +18,6 @@ mod parallel_iterator_ext; mod ref_cell_ext; pub(crate) mod steps; pub(crate) mod thread_local; -mod top_level_map; pub mod vector_document; mod word_fst_builder; mod words_prefix_docids; diff --git a/crates/milli/src/update/new/top_level_map.rs b/crates/milli/src/update/new/top_level_map.rs deleted file mode 100644 index aebb64bc9..000000000 --- a/crates/milli/src/update/new/top_level_map.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::borrow::{Borrow, Cow}; -use std::collections::BTreeMap; -use std::{fmt, ops}; - -use serde::{Deserialize, Serialize}; -use serde_json::value::RawValue; -use serde_json::{Map, Value}; - -#[derive(Deserialize, Serialize)] -pub struct TopLevelMap<'p>(#[serde(borrow)] pub BTreeMap, &'p RawValue>); - -impl TryFrom<&'_ TopLevelMap<'_>> for Map { - type Error = serde_json::Error; - - fn try_from(tlmap: &TopLevelMap<'_>) -> Result { - let mut object = Map::new(); - for (k, v) in &tlmap.0 { - let value = serde_json::from_str(v.get())?; - object.insert(k.to_string(), value); - } - Ok(object) - } -} - -impl TryFrom> for Map { - type Error = serde_json::Error; - - fn try_from(tlmap: TopLevelMap<'_>) -> Result { - TryFrom::try_from(&tlmap) - } -} - -impl<'p> ops::Deref for TopLevelMap<'p> { - type Target = BTreeMap, &'p RawValue>; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl ops::DerefMut for TopLevelMap<'_> { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -#[derive(Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] -pub struct CowStr<'p>(#[serde(borrow)] pub Cow<'p, str>); - -impl fmt::Display for CowStr<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} - -impl AsRef for CowStr<'_> { - fn as_ref(&self) -> &str { - self.0.as_ref() - } -} - -impl<'doc> Borrow for CowStr<'doc> { - fn borrow(&self) -> &str { - self.0.borrow() - } -}