From fcfc4caf8c4512f9dce768017d95ac91804a90d2 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 15 Jun 2022 15:36:27 +0200 Subject: [PATCH] Move the Object type in the lib.rs file and use it everywhere --- benchmarks/benches/utils.rs | 20 ++++++++---------- cli/src/main.rs | 9 ++++---- http-ui/src/main.rs | 16 ++++++-------- milli/fuzz/fuzz_targets/indexing.rs | 2 +- milli/src/documents/builder.rs | 5 +++-- milli/src/error.rs | 6 ++---- milli/src/lib.rs | 21 ++++++++++--------- .../extract/extract_geo_points.rs | 1 - milli/src/update/index_documents/transform.rs | 2 +- milli/tests/search/facet_distribution.rs | 6 +++--- milli/tests/search/mod.rs | 6 +++--- 11 files changed, 43 insertions(+), 51 deletions(-) diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 091b9b0f5..630e17943 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -11,8 +11,8 @@ use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{ IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, }; -use milli::{Filter, Index}; -use serde_json::{Map, Value}; +use milli::{Filter, Index, Object}; +use serde_json::Value; pub struct Conf<'a> { /// where we are going to create our database.mmdb directory @@ -96,12 +96,10 @@ pub fn base_setup(conf: &Conf) -> Index { update_method: IndexDocumentsMethod::ReplaceDocuments, ..Default::default() }; - let mut builder = - IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap(); + let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap(); let documents = documents_from(conf.dataset, conf.dataset_format); - - builder.add_documents(documents).unwrap(); - + let (builder, user_error) = builder.add_documents(documents).unwrap(); + user_error.unwrap(); builder.execute().unwrap(); wtxn.commit().unwrap(); @@ -156,7 +154,7 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader anyhow::Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); - for result in serde_json::Deserializer::from_reader(reader).into_iter::>() { + for result in serde_json::Deserializer::from_reader(reader).into_iter::() { let object = result?; documents.append_json_object(&object)?; } @@ -166,7 +164,7 @@ fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result> { fn documents_from_json(reader: impl BufRead) -> anyhow::Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); - let list: Vec> = serde_json::from_reader(reader)?; + let list: Vec = serde_json::from_reader(reader)?; for object in list { documents.append_json_object(&object)?; @@ -221,14 +219,14 @@ impl CSVDocumentDeserializer { } impl Iterator for CSVDocumentDeserializer { - type Item = anyhow::Result>; + type Item = anyhow::Result; fn next(&mut self) -> Option { let csv_document = self.documents.next()?; match csv_document { Ok(csv_document) => { - let mut document = Map::new(); + let mut document = Object::new(); for ((field_name, field_type), value) in self.headers.iter().zip(csv_document.into_iter()) diff --git a/cli/src/main.rs b/cli/src/main.rs index dcd0f407a..db4ca91ab 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -13,8 +13,7 @@ use milli::update::UpdateIndexingStep::{ ComputeIdsAndMergeDocuments, IndexDocuments, MergeDataIntoFinalDatabase, RemapDocumentAddition, }; use milli::update::{self, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig}; -use milli::Index; -use serde_json::{Map, Value}; +use milli::{Index, Object}; use structopt::StructOpt; #[cfg(target_os = "linux")] @@ -325,7 +324,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); let reader = BufReader::new(reader); - for result in serde_json::Deserializer::from_reader(reader).into_iter::>() { + for result in serde_json::Deserializer::from_reader(reader).into_iter::() { let object = result?; documents.append_json_object(&object)?; } @@ -335,7 +334,7 @@ fn documents_from_jsonl(reader: impl Read) -> Result> { fn documents_from_json(reader: impl Read) -> Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); - let list: Vec> = serde_json::from_reader(reader)?; + let list: Vec = serde_json::from_reader(reader)?; for object in list { documents.append_json_object(&object)?; @@ -424,7 +423,7 @@ impl Search { filter: &Option, offset: &Option, limit: &Option, - ) -> Result>> { + ) -> Result> { let txn = index.read_txn()?; let mut search = index.search(&txn); diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 63b9ee5e0..8167076c6 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -26,11 +26,11 @@ use milli::update::{ }; use milli::{ obkv_to_json, CompressionType, Filter as MilliFilter, FilterCondition, FormatOptions, Index, - MatcherBuilder, SearchResult, SortError, + MatcherBuilder, Object, SearchResult, SortError, }; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; +use serde_json::Value; use structopt::StructOpt; use tokio::fs::File as TFile; use tokio::io::AsyncWriteExt; @@ -169,11 +169,7 @@ impl<'s, A: AsRef<[u8]>> Highlighter<'s, A> { } } - fn highlight_record( - &self, - object: &mut Map, - attributes_to_highlight: &HashSet, - ) { + fn highlight_record(&self, object: &mut Object, attributes_to_highlight: &HashSet) { // TODO do we need to create a string for element that are not and needs to be highlight? for (key, value) in object.iter_mut() { if attributes_to_highlight.contains(key) { @@ -708,7 +704,7 @@ async fn main() -> anyhow::Result<()> { #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] struct Answer { - documents: Vec>, + documents: Vec, number_of_candidates: u64, facets: BTreeMap>, } @@ -1036,7 +1032,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); let reader = BufReader::new(reader); - for result in serde_json::Deserializer::from_reader(reader).into_iter::>() { + for result in serde_json::Deserializer::from_reader(reader).into_iter::() { let object = result?; documents.append_json_object(&object)?; } @@ -1046,7 +1042,7 @@ fn documents_from_jsonl(reader: impl Read) -> anyhow::Result> { fn documents_from_json(reader: impl Read) -> anyhow::Result> { let mut documents = DocumentsBatchBuilder::new(Vec::new()); - let list: Vec> = serde_json::from_reader(reader)?; + let list: Vec = serde_json::from_reader(reader)?; for object in list { documents.append_json_object(&object)?; diff --git a/milli/fuzz/fuzz_targets/indexing.rs b/milli/fuzz/fuzz_targets/indexing.rs index 5c3b79ed7..e4f42655e 100644 --- a/milli/fuzz/fuzz_targets/indexing.rs +++ b/milli/fuzz/fuzz_targets/indexing.rs @@ -21,7 +21,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { let writer = BufWriter::new(writer); let mut builder = DocumentsBatchBuilder::new(writer); - let values: Vec> = serde_json::from_reader(input)?; + let values: Vec = serde_json::from_reader(input)?; if builder.documents_count() == 0 { bail!("Empty payload"); } diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 15a22090a..589e52269 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -1,9 +1,10 @@ use std::io::{self, Write}; use grenad::{CompressionType, WriterBuilder}; -use serde_json::{to_writer, Map, Value}; +use serde_json::{to_writer, Value}; use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY}; +use crate::Object; /// The `DocumentsBatchBuilder` provides a way to build a documents batch in the intermediary /// format used by milli. @@ -55,7 +56,7 @@ impl DocumentsBatchBuilder { } /// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly. - pub fn append_json_object(&mut self, object: &Map) -> io::Result<()> { + pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> { // Make sure that we insert the fields ids in order as the obkv writer has this requirement. let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(&k)).collect(); fields_ids.sort_unstable(); diff --git a/milli/src/error.rs b/milli/src/error.rs index d34130210..a23472951 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -4,12 +4,10 @@ use std::{io, str}; use heed::{Error as HeedError, MdbError}; use rayon::ThreadPoolBuildError; -use serde_json::{Map, Value}; +use serde_json::Value; use thiserror::Error; -use crate::{CriterionError, DocumentId, FieldId, SortError}; - -pub type Object = Map; +use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { ["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword) diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 81cd057d5..a7be87183 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -20,7 +20,7 @@ use std::hash::BuildHasherDefault; pub use filter_parser::{Condition, FilterCondition}; use fxhash::{FxHasher32, FxHasher64}; pub use grenad::CompressionType; -use serde_json::{Map, Value}; +use serde_json::Value; pub use {charabia as tokenizer, heed}; pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError}; @@ -43,20 +43,21 @@ pub use self::search::{ pub type Result = std::result::Result; +pub type Attribute = u32; +pub type BEU32 = heed::zerocopy::U32; +pub type BEU64 = heed::zerocopy::U64; +pub type DocumentId = u32; pub type FastMap4 = HashMap>; pub type FastMap8 = HashMap>; +pub type FieldDistribution = BTreeMap; +pub type FieldId = u16; +pub type Object = serde_json::Map; +pub type Position = u32; +pub type RelativePosition = u16; pub type SmallString32 = smallstr::SmallString<[u8; 32]>; pub type SmallVec16 = smallvec::SmallVec<[T; 16]>; pub type SmallVec32 = smallvec::SmallVec<[T; 32]>; pub type SmallVec8 = smallvec::SmallVec<[T; 8]>; -pub type BEU32 = heed::zerocopy::U32; -pub type BEU64 = heed::zerocopy::U64; -pub type Attribute = u32; -pub type DocumentId = u32; -pub type FieldId = u16; -pub type Position = u32; -pub type RelativePosition = u16; -pub type FieldDistribution = BTreeMap; /// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata /// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point @@ -82,7 +83,7 @@ pub fn obkv_to_json( displayed_fields: &[FieldId], fields_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, -) -> Result> { +) -> Result { displayed_fields .iter() .copied() diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index 0f804b93b..46ef9ba9b 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -1,6 +1,5 @@ use std::fs::File; use std::io; -use std::result::Result as StdResult; use concat_arrays::concat_arrays; use serde_json::Value; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index bc7eefd33..4ece58509 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -13,7 +13,7 @@ use serde_json::{Map, Value}; use smartstring::SmartString; use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn}; -use super::{validate_document_id, IndexDocumentsMethod, IndexerConfig}; +use super::{IndexDocumentsMethod, IndexerConfig}; use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader}; use crate::error::{Error, InternalError, UserError}; use crate::index::db_name; diff --git a/milli/tests/search/facet_distribution.rs b/milli/tests/search/facet_distribution.rs index 66713de1e..8890285e7 100644 --- a/milli/tests/search/facet_distribution.rs +++ b/milli/tests/search/facet_distribution.rs @@ -5,8 +5,8 @@ use heed::EnvOpenOptions; use maplit::hashset; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; -use milli::{FacetDistribution, Index}; -use serde_json::{Deserializer, Map, Value}; +use milli::{FacetDistribution, Index, Object}; +use serde_json::Deserializer; #[test] fn test_facet_distribution_with_no_facet_values() { @@ -46,7 +46,7 @@ fn test_facet_distribution_with_no_facet_values() { }"#, ); - for result in Deserializer::from_reader(reader).into_iter::>() { + for result in Deserializer::from_reader(reader).into_iter::() { let object = result.unwrap(); documents_builder.append_json_object(&object).unwrap(); } diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 4cf117dc7..0b6ce80cc 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -8,9 +8,9 @@ use heed::EnvOpenOptions; use maplit::{hashmap, hashset}; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; -use milli::{AscDesc, Criterion, DocumentId, Index, Member}; +use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object}; use serde::Deserialize; -use serde_json::{Deserializer, Map, Value}; +use serde_json::Deserializer; use slice_group_by::GroupBy; mod distinct; @@ -66,7 +66,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut documents_builder = DocumentsBatchBuilder::new(Vec::new()); let reader = Cursor::new(CONTENT.as_bytes()); - for result in Deserializer::from_reader(reader).into_iter::>() { + for result in Deserializer::from_reader(reader).into_iter::() { let object = result.unwrap(); documents_builder.append_json_object(&object).unwrap(); }