From 04596f36169fb427b4ac8b2279a7ea6b61c7c134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 10 Sep 2024 18:01:17 +0100 Subject: [PATCH] Move the TopLevelMap into a dedicated module --- .../update/new/indexer/document_operation.rs | 37 +++---------------- milli/src/update/new/indexer/mod.rs | 3 +- milli/src/update/new/indexer/top_level_map.rs | 30 +++++++++++++++ 3 files changed, 38 insertions(+), 32 deletions(-) create mode 100644 milli/src/update/new/indexer/top_level_map.rs diff --git a/milli/src/update/new/indexer/document_operation.rs b/milli/src/update/new/indexer/document_operation.rs index 799079b0a..0521d43f9 100644 --- a/milli/src/update/new/indexer/document_operation.rs +++ b/milli/src/update/new/indexer/document_operation.rs @@ -1,16 +1,19 @@ use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; +use std::fmt; use std::sync::Arc; use heed::types::Bytes; use heed::RoTxn; use memmap2::Mmap; use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use serde_json::from_str; use IndexDocumentsMethod as Idm; use super::super::document_change::DocumentChange; use super::super::items_pool::ItemsPool; -use super::DocumentChanges; +use super::top_level_map::{CowStr, TopLevelMap}; +use super::{top_level_map, DocumentChanges}; use crate::documents::PrimaryKey; use crate::update::new::{Deletion, Insertion, KvReaderFieldId, KvWriterFieldId, Update}; use crate::update::{AvailableIds, IndexDocumentsMethod}; @@ -395,36 +398,8 @@ impl MergeChanges for MergeDocumentForUpdates { } } -use std::borrow::Borrow; - -use serde::Deserialize; -use serde_json::from_str; -use serde_json::value::RawValue; - -#[derive(Deserialize)] -pub struct TopLevelMap<'p>(#[serde(borrow)] BTreeMap, &'p RawValue>); - -#[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] -pub struct CowStr<'p>(#[serde(borrow)] Cow<'p, str>); - -impl CowStr<'_> { - fn to_string(&self) -> String { - self.0.to_string() - } -} - -impl AsRef for CowStr<'_> { - fn as_ref(&self) -> &str { - self.0.as_ref() - } -} - -impl<'doc> Borrow for CowStr<'doc> { - fn borrow(&self) -> &str { - self.0.borrow() - } -} - +/// Returns the document ID based on the primary and +/// search for it recursively in zero-copy-deserialized documents. fn get_docid<'p>( map: &TopLevelMap<'p>, primary_key: &[&str], diff --git a/milli/src/update/new/indexer/mod.rs b/milli/src/update/new/indexer/mod.rs index 5e3104de8..4d7e2aa47 100644 --- a/milli/src/update/new/indexer/mod.rs +++ b/milli/src/update/new/indexer/mod.rs @@ -26,6 +26,7 @@ use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError}; mod document_deletion; mod document_operation; mod partial_dump; +mod top_level_map; mod update_by_function; pub trait DocumentChanges<'p> { @@ -121,7 +122,7 @@ where &extractor_sender, )?; } - + { let span = tracing::trace_span!(target: "indexing::documents::extract", "exact_word_docids"); let _entered = span.enter(); diff --git a/milli/src/update/new/indexer/top_level_map.rs b/milli/src/update/new/indexer/top_level_map.rs new file mode 100644 index 000000000..9e1481b5e --- /dev/null +++ b/milli/src/update/new/indexer/top_level_map.rs @@ -0,0 +1,30 @@ +use std::borrow::{Borrow, Cow}; +use std::collections::BTreeMap; +use std::fmt; + +use serde::Deserialize; +use serde_json::value::RawValue; + +#[derive(Deserialize)] +pub struct TopLevelMap<'p>(#[serde(borrow)] pub BTreeMap, &'p RawValue>); + +#[derive(Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct CowStr<'p>(#[serde(borrow)] pub Cow<'p, str>); + +impl fmt::Display for CowStr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +impl AsRef for CowStr<'_> { + fn as_ref(&self) -> &str { + self.0.as_ref() + } +} + +impl<'doc> Borrow for CowStr<'doc> { + fn borrow(&self) -> &str { + self.0.borrow() + } +}