From 312c2d1d8ef12d3b7e50a39f909978f73fd0459e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 14 Jun 2021 16:46:19 +0200 Subject: [PATCH] Use the Error enum everywhere in the project --- Cargo.lock | 1 - milli/Cargo.toml | 1 - milli/src/criterion.rs | 11 +- milli/src/index.rs | 22 ++-- milli/src/lib.rs | 16 ++- milli/src/search/criteria/asc_desc.rs | 21 ++-- milli/src/search/criteria/attribute.rs | 17 ++- milli/src/search/criteria/exactness.rs | 6 +- milli/src/search/criteria/final.rs | 3 +- milli/src/search/criteria/initial.rs | 4 +- milli/src/search/criteria/mod.rs | 27 ++--- milli/src/search/criteria/proximity.rs | 19 ++-- milli/src/search/criteria/typo.rs | 13 ++- milli/src/search/criteria/words.rs | 3 +- milli/src/search/distinct/facet_distinct.rs | 14 +-- milli/src/search/distinct/mod.rs | 6 +- milli/src/search/distinct/noop_distinct.rs | 4 +- milli/src/search/facet/facet_distribution.rs | 11 +- milli/src/search/facet/filter_condition.rs | 70 +++++++----- milli/src/search/facet/mod.rs | 3 +- milli/src/search/mod.rs | 10 +- milli/src/search/query_tree.rs | 14 +-- milli/src/update/clear_documents.rs | 5 +- milli/src/update/delete_documents.rs | 14 ++- milli/src/update/facets.rs | 13 ++- .../update/index_documents/merge_function.rs | 16 +-- milli/src/update/index_documents/mod.rs | 92 +++++++++------- milli/src/update/index_documents/store.rs | 104 ++++++++++-------- milli/src/update/index_documents/transform.rs | 77 +++++++------ milli/src/update/settings.rs | 41 +++---- milli/src/update/update_builder.rs | 4 +- milli/src/update/word_prefix_docids.rs | 3 +- .../word_prefix_pair_proximity_docids.rs | 4 +- milli/src/update/words_level_positions.rs | 12 +- milli/src/update/words_prefixes_fst.rs | 4 +- 35 files changed, 385 insertions(+), 300 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c719f6f03..8e6794fb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1377,7 +1377,6 @@ dependencies = [ name = "milli" version = "0.3.1" dependencies = [ - "anyhow", "big_s", "bstr", "byteorder", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 9fe1ce3d3..ac7a977a2 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -5,7 +5,6 @@ authors = ["Kerollmops "] edition = "2018" [dependencies] -anyhow = "1.0.38" bstr = "0.2.15" byteorder = "1.4.2" chrono = { version = "0.4.19", features = ["serde"] } diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index c2205613d..931cf8588 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -1,11 +1,12 @@ use std::fmt; use std::str::FromStr; -use anyhow::{Context, bail}; use regex::Regex; use serde::{Serialize, Deserialize}; use once_cell::sync::Lazy; +use crate::error::{Error, UserError}; + static ASC_DESC_REGEX: Lazy = Lazy::new(|| { Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap() }); @@ -41,7 +42,7 @@ impl Criterion { } impl FromStr for Criterion { - type Err = anyhow::Error; + type Err = Error; fn from_str(txt: &str) -> Result { match txt { @@ -51,13 +52,15 @@ impl FromStr for Criterion { "attribute" => Ok(Criterion::Attribute), "exactness" => Ok(Criterion::Exactness), text => { - let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; + let caps = ASC_DESC_REGEX.captures(text).ok_or_else(|| { + UserError::InvalidCriterionName { name: text.to_string() } + })?; let order = caps.get(1).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str(); match order { "asc" => Ok(Criterion::Asc(field_name.to_string())), "desc" => Ok(Criterion::Desc(field_name.to_string())), - otherwise => bail!("unknown criterion name: {}", otherwise), + text => return Err(UserError::InvalidCriterionName { name: text.to_string() }.into()), } }, } diff --git a/milli/src/index.rs b/milli/src/index.rs index 4e32f673a..9ebe34a2e 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2,14 +2,14 @@ use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::path::Path; -use anyhow::Context; use chrono::{DateTime, Utc}; use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use heed::types::*; use roaring::RoaringBitmap; +use crate::error::UserError; use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search}; -use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId}; +use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId, Result}; use crate::{ BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec, @@ -84,7 +84,7 @@ pub struct Index { } impl Index { - pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result { + pub fn new>(mut options: heed::EnvOpenOptions, path: P) -> Result { options.max_dbs(14); let env = options.open(path)?; @@ -173,7 +173,7 @@ impl Index { } /// Returns the number of documents indexed in the database. - pub fn number_of_documents(&self, rtxn: &RoTxn) -> anyhow::Result { + pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result { let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?; Ok(count.unwrap_or_default()) } @@ -215,7 +215,7 @@ impl Index { /// Returns the external documents ids map which associate the external ids /// with the internal ids (i.e. `u32`). - pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result> { + pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result> { let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; let hard = match hard { @@ -504,7 +504,7 @@ impl Index { } /// Returns the FST which is the words dictionary of the engine. - pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { + pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), @@ -521,7 +521,7 @@ impl Index { self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY) } - pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { + pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result>> { match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? { Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), None => Ok(None), @@ -555,7 +555,7 @@ impl Index { } /// Returns the FST which is the words prefixes dictionnary of the engine. - pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result>> { + pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result>> { match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? { Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?), @@ -577,13 +577,13 @@ impl Index { &self, rtxn: &'t RoTxn, ids: impl IntoIterator, - ) -> anyhow::Result)>> + ) -> Result)>> { let mut documents = Vec::new(); for id in ids { let kv = self.documents.get(rtxn, &BEU32::new(id))? - .with_context(|| format!("Could not find document {}", id))?; + .ok_or_else(|| UserError::UnknownInternalDocumentId { document_id: id })?; documents.push((id, kv)); } @@ -594,7 +594,7 @@ impl Index { pub fn all_documents<'t>( &self, rtxn: &'t RoTxn, - ) -> anyhow::Result)>>> { + ) -> Result)>>> { Ok(self .documents .iter(rtxn)? diff --git a/milli/src/lib.rs b/milli/src/lib.rs index b7401330a..6fa88ad64 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -15,12 +15,13 @@ pub mod update; use std::borrow::Cow; use std::collections::HashMap; use std::hash::BuildHasherDefault; +use std::result::Result as StdResult; -use anyhow::Context; use fxhash::{FxHasher32, FxHasher64}; use serde_json::{Map, Value}; pub use self::criterion::{Criterion, default_criteria}; +pub use self::error::Error; pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fields_ids_map::FieldsIdsMap; pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec}; @@ -30,6 +31,8 @@ pub use self::index::Index; pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords}; pub use self::tree_level::TreeLevel; +pub type Result = std::result::Result; + pub type FastMap4 = HashMap>; pub type FastMap8 = HashMap>; pub type SmallString32 = smallstr::SmallString<[u8; 32]>; @@ -44,21 +47,24 @@ pub type FieldId = u8; pub type Position = u32; pub type FieldsDistribution = HashMap; -type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result>; +type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult, E>; /// Transform a raw obkv store into a JSON Object. pub fn obkv_to_json( displayed_fields: &[FieldId], fields_ids_map: &FieldsIdsMap, obkv: obkv::KvReader, -) -> anyhow::Result> +) -> Result> { displayed_fields.iter() .copied() .flat_map(|id| obkv.get(id).map(|value| (id, value))) .map(|(id, value)| { - let name = fields_ids_map.name(id).context("unknown obkv field id")?; - let value = serde_json::from_slice(value)?; + let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId { + field_id: id, + from_db_name: "documents", + })?; + let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?; Ok((name.to_owned(), value)) }) .collect() diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index f90f3e421..c72781629 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -1,15 +1,15 @@ use std::mem::take; -use anyhow::Context; use itertools::Itertools; use log::debug; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; +use crate::error::FieldIdMapMissingEntry; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::FacetIter; use crate::search::query_tree::Operation; -use crate::{FieldId, Index}; +use crate::{FieldId, Index, Result}; use super::{Criterion, CriterionParameters, CriterionResult}; /// Threshold on the number of candidates that will make @@ -36,7 +36,7 @@ impl<'t> AscDesc<'t> { rtxn: &'t heed::RoTxn, parent: Box, field_name: String, - ) -> anyhow::Result { + ) -> Result { Self::new(index, rtxn, parent, field_name, true) } @@ -45,7 +45,7 @@ impl<'t> AscDesc<'t> { rtxn: &'t heed::RoTxn, parent: Box, field_name: String, - ) -> anyhow::Result { + ) -> Result { Self::new(index, rtxn, parent, field_name, false) } @@ -55,11 +55,14 @@ impl<'t> AscDesc<'t> { parent: Box, field_name: String, ascending: bool, - ) -> anyhow::Result { + ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let field_id = fields_ids_map .id(&field_name) - .with_context(|| format!("field {:?} isn't registered", field_name))?; + .ok_or_else(|| FieldIdMapMissingEntry::FieldName { + field_name: field_name.clone(), + from_db_name: "asc-desc", + })?; Ok(AscDesc { index, @@ -79,7 +82,7 @@ impl<'t> AscDesc<'t> { impl<'t> Criterion for AscDesc<'t> { #[logging_timer::time("AscDesc::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { // remove excluded candidates when next is called, instead of doing it in the loop. self.allowed_candidates -= params.excluded_candidates; @@ -162,7 +165,7 @@ fn facet_ordered<'t>( field_id: FieldId, ascending: bool, candidates: RoaringBitmap, -) -> anyhow::Result> + 't>> { +) -> Result> + 't>> { if candidates.len() <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; Ok(Box::new(iter.map(Ok)) as Box>) @@ -186,7 +189,7 @@ fn iterative_facet_ordered_iter<'t>( field_id: FieldId, ascending: bool, candidates: RoaringBitmap, -) -> anyhow::Result + 't> { +) -> Result + 't> { let mut docids_values = Vec::with_capacity(candidates.len() as usize); for docid in candidates.iter() { let left = (field_id, docid, f64::MIN); diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs index f825623f6..f191defe1 100644 --- a/milli/src/search/criteria/attribute.rs +++ b/milli/src/search/criteria/attribute.rs @@ -5,7 +5,7 @@ use std::mem::take; use roaring::RoaringBitmap; -use crate::{TreeLevel, search::build_dfa}; +use crate::{TreeLevel, Result, search::build_dfa}; use crate::search::criteria::Query; use crate::search::query_tree::{Operation, QueryKind}; use crate::search::{word_derivations, WordDerivationsCache}; @@ -48,7 +48,7 @@ impl<'t> Attribute<'t> { impl<'t> Criterion for Attribute<'t> { #[logging_timer::time("Attribute::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { // remove excluded candidates when next is called, instead of doing it in the loop. if let Some((_, _, allowed_candidates)) = self.state.as_mut() { *allowed_candidates -= params.excluded_candidates; @@ -224,7 +224,12 @@ struct QueryLevelIterator<'t, 'q> { } impl<'t, 'q> QueryLevelIterator<'t, 'q> { - fn new(ctx: &'t dyn Context<'t>, queries: &'q [Query], wdcache: &mut WordDerivationsCache) -> anyhow::Result> { + fn new( + ctx: &'t dyn Context<'t>, + queries: &'q [Query], + wdcache: &mut WordDerivationsCache, + ) -> Result> + { let mut inner = Vec::with_capacity(queries.len()); for query in queries { match &query.kind { @@ -471,7 +476,7 @@ fn initialize_query_level_iterators<'t, 'q>( branches: &'q FlattenedQueryTree, allowed_candidates: &RoaringBitmap, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result>> { +) -> Result>> { let mut positions = BinaryHeap::with_capacity(branches.len()); for branch in branches { @@ -521,7 +526,7 @@ fn set_compute_candidates<'t>( branches: &FlattenedQueryTree, allowed_candidates: &RoaringBitmap, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result> +) -> Result> { let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?; let lowest_level = TreeLevel::min_value(); @@ -573,7 +578,7 @@ fn linear_compute_candidates( ctx: &dyn Context, branches: &FlattenedQueryTree, allowed_candidates: &RoaringBitmap, -) -> anyhow::Result> +) -> Result> { fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap) -> u64 { let mut min_rank = u64::max_value(); diff --git a/milli/src/search/criteria/exactness.rs b/milli/src/search/criteria/exactness.rs index 4d9e54f6e..eb44b7b8e 100644 --- a/milli/src/search/criteria/exactness.rs +++ b/milli/src/search/criteria/exactness.rs @@ -14,7 +14,7 @@ use crate::search::criteria::{ CriterionResult, resolve_query_tree, }; -use crate::TreeLevel; +use crate::{TreeLevel, Result}; pub struct Exactness<'t> { ctx: &'t dyn Context<'t>, @@ -45,7 +45,7 @@ impl<'t> Exactness<'t> { impl<'t> Criterion for Exactness<'t> { #[logging_timer::time("Exactness::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { // remove excluded candidates when next is called, instead of doing it in the loop. if let Some(state) = self.state.as_mut() { state.difference_with(params.excluded_candidates); @@ -158,7 +158,7 @@ fn resolve_state( ctx: &dyn Context, state: State, query: &[ExactQueryPart], -) -> anyhow::Result<(RoaringBitmap, Option)> +) -> Result<(RoaringBitmap, Option)> { use State::*; match state { diff --git a/milli/src/search/criteria/final.rs b/milli/src/search/criteria/final.rs index 860362f51..645a3a5d7 100644 --- a/milli/src/search/criteria/final.rs +++ b/milli/src/search/criteria/final.rs @@ -1,6 +1,7 @@ use log::debug; use roaring::RoaringBitmap; +use crate::Result; use crate::search::query_tree::Operation; use crate::search::WordDerivationsCache; use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context}; @@ -29,7 +30,7 @@ impl<'t> Final<'t> { } #[logging_timer::time("Final::{}")] - pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result> { + pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result> { debug!("Final iteration"); let excluded_candidates = &self.returned_candidates | excluded_candidates; let mut criterion_parameters = CriterionParameters { diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs index 5d242a0eb..e6d0a17f7 100644 --- a/milli/src/search/criteria/initial.rs +++ b/milli/src/search/criteria/initial.rs @@ -1,7 +1,7 @@ use roaring::RoaringBitmap; +use crate::Result; use crate::search::query_tree::Operation; - use super::{Criterion, CriterionResult, CriterionParameters}; pub struct Initial { @@ -22,7 +22,7 @@ impl Initial { impl Criterion for Initial { #[logging_timer::time("Initial::{}")] - fn next(&mut self, _: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, _: &mut CriterionParameters) -> Result> { Ok(self.answer.take()) } } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index b14d75ddb..981fc3ef2 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -4,7 +4,7 @@ use std::borrow::Cow; use roaring::RoaringBitmap; use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}}; -use crate::{Index, DocumentId}; +use crate::{Index, DocumentId, Result}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use self::asc_desc::AscDesc; @@ -26,7 +26,7 @@ mod words; pub mod r#final; pub trait Criterion { - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result>; + fn next(&mut self, params: &mut CriterionParameters) -> Result>; } /// The result of a call to the parent criterion. @@ -78,8 +78,9 @@ pub trait Context<'c> { fn synonyms(&self, word: &str) -> heed::Result>>>; fn searchable_fields_ids(&self) -> heed::Result>; fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result>; - fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result, heed::Error>; + fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result>; } + pub struct CriteriaBuilder<'t> { rtxn: &'t heed::RoTxn<'t>, index: &'t Index, @@ -185,14 +186,14 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> { self.index.field_id_word_count_docids.get(self.rtxn, &key) } - fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result, heed::Error> { + fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result> { let key = (word, level, left, right); self.index.word_level_position_docids.get(self.rtxn, &key) } } impl<'t> CriteriaBuilder<'t> { - pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> anyhow::Result { + pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> Result { let words_fst = index.words_fst(rtxn)?; let words_prefixes_fst = index.words_prefixes_fst(rtxn)?; Ok(Self { rtxn, index, words_fst, words_prefixes_fst }) @@ -203,7 +204,7 @@ impl<'t> CriteriaBuilder<'t> { query_tree: Option, primitive_query: Option>, filtered_candidates: Option, - ) -> anyhow::Result> + ) -> Result> { use crate::criterion::Criterion as Name; @@ -230,13 +231,13 @@ pub fn resolve_query_tree<'t>( ctx: &'t dyn Context, query_tree: &Operation, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { fn resolve_operation<'t>( ctx: &'t dyn Context, query_tree: &Operation, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result + ) -> Result { use Operation::{And, Phrase, Or, Query}; @@ -244,7 +245,7 @@ pub fn resolve_query_tree<'t>( And(ops) => { let mut ops = ops.iter().map(|op| { resolve_operation(ctx, op, wdcache) - }).collect::>>()?; + }).collect::>>()?; ops.sort_unstable_by_key(|cds| cds.len()); @@ -302,7 +303,7 @@ fn all_word_pair_proximity_docids, U: AsRef>( left_words: &[(T, u8)], right_words: &[(U, u8)], proximity: u8 -) -> anyhow::Result +) -> Result { let mut docids = RoaringBitmap::new(); for (left, _l_typo) in left_words { @@ -318,7 +319,7 @@ fn query_docids( ctx: &dyn Context, query: &Query, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { match &query.kind { QueryKind::Exact { word, .. } => { @@ -354,7 +355,7 @@ fn query_pair_proximity_docids( right: &Query, proximity: u8, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { if proximity >= 8 { let mut candidates = query_docids(ctx, left, wdcache)?; @@ -481,7 +482,7 @@ pub mod test { todo!() } - fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> Result, heed::Error> { + fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result> { todo!() } diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index 4da6fd1eb..c3c8027cb 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -5,9 +5,10 @@ use std::mem::take; use roaring::RoaringBitmap; use log::debug; -use crate::{DocumentId, Position, search::{query_tree::QueryKind}}; use crate::search::query_tree::{maximum_proximity, Operation, Query}; use crate::search::{build_dfa, WordDerivationsCache}; +use crate::search::{query_tree::QueryKind}; +use crate::{DocumentId, Position, Result}; use super::{ Context, Criterion, @@ -55,7 +56,7 @@ impl<'t> Proximity<'t> { impl<'t> Criterion for Proximity<'t> { #[logging_timer::time("Proximity::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { // remove excluded candidates when next is called, instead of doing it in the loop. if let Some((_, _, allowed_candidates)) = self.state.as_mut() { *allowed_candidates -= params.excluded_candidates; @@ -161,7 +162,7 @@ fn resolve_candidates<'t>( proximity: u8, cache: &mut Cache, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { fn resolve_operation<'t>( ctx: &'t dyn Context, @@ -169,7 +170,7 @@ fn resolve_candidates<'t>( proximity: u8, cache: &mut Cache, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result> + ) -> Result> { use Operation::{And, Phrase, Or}; @@ -227,7 +228,7 @@ fn resolve_candidates<'t>( proximity: u8, cache: &mut Cache, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result> + ) -> Result> { fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator { (0..=mana.min(left_max)).map(move |m| (m, mana - m)) @@ -281,7 +282,7 @@ fn resolve_candidates<'t>( proximity: u8, cache: &mut Cache, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result> + ) -> Result> { // Extract the first two elements but gives the tail // that is just after the first element. @@ -324,13 +325,13 @@ fn resolve_plane_sweep_candidates( query_tree: &Operation, allowed_candidates: &RoaringBitmap, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result> +) -> Result> { /// FIXME may be buggy with query like "new new york" fn plane_sweep( groups_positions: Vec>, consecutive: bool, - ) -> anyhow::Result> + ) -> Result> { fn compute_groups_proximity( groups: &[(usize, (Position, u8, Position))], @@ -451,7 +452,7 @@ fn resolve_plane_sweep_candidates( rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>, words_positions: &HashMap, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result> + ) -> Result> { use Operation::{And, Phrase, Or}; diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index d075b6bca..436f4affd 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -5,6 +5,7 @@ use roaring::RoaringBitmap; use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; use crate::search::{word_derivations, WordDerivationsCache}; +use crate::Result; use super::{ Candidates, Context, @@ -43,7 +44,7 @@ impl<'t> Typo<'t> { impl<'t> Criterion for Typo<'t> { #[logging_timer::time("Typo::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { use Candidates::{Allowed, Forbidden}; // remove excluded candidates when next is called, instead of doing it in the loop. match self.state.as_mut() { @@ -163,14 +164,14 @@ fn alterate_query_tree( mut query_tree: Operation, number_typos: u8, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { fn recurse( words_fst: &fst::Set>, operation: &mut Operation, number_typos: u8, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result<()> + ) -> Result<()> { use Operation::{And, Phrase, Or}; @@ -218,7 +219,7 @@ fn resolve_candidates<'t>( number_typos: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, wdcache: &mut WordDerivationsCache, -) -> anyhow::Result +) -> Result { fn resolve_operation<'t>( ctx: &'t dyn Context, @@ -226,7 +227,7 @@ fn resolve_candidates<'t>( number_typos: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result + ) -> Result { use Operation::{And, Phrase, Or, Query}; @@ -277,7 +278,7 @@ fn resolve_candidates<'t>( mana: u8, cache: &mut HashMap<(Operation, u8), RoaringBitmap>, wdcache: &mut WordDerivationsCache, - ) -> anyhow::Result + ) -> Result { match branches.split_first() { Some((head, [])) => { diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs index 8730fa331..add90d80d 100644 --- a/milli/src/search/criteria/words.rs +++ b/milli/src/search/criteria/words.rs @@ -4,6 +4,7 @@ use log::debug; use roaring::RoaringBitmap; use crate::search::query_tree::Operation; +use crate::Result; use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree}; pub struct Words<'t> { @@ -30,7 +31,7 @@ impl<'t> Words<'t> { impl<'t> Criterion for Words<'t> { #[logging_timer::time("Words::{}")] - fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result> { + fn next(&mut self, params: &mut CriterionParameters) -> Result> { // remove excluded candidates when next is called, instead of doing it in the loop. if let Some(candidates) = self.candidates.as_mut() { *candidates -= params.excluded_candidates; diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index de7b28141..f86d6b8ed 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -5,7 +5,7 @@ use roaring::RoaringBitmap; use super::{Distinct, DocIter}; use crate::heed_codec::facet::*; -use crate::{DocumentId, FieldId, Index}; +use crate::{DocumentId, FieldId, Index, Result}; const FID_SIZE: usize = size_of::(); const DOCID_SIZE: usize = size_of::(); @@ -57,7 +57,7 @@ impl<'a> FacetDistinctIter<'a> { .get(self.txn, &(self.distinct, 0, key, key)) } - fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> { + fn distinct_string(&mut self, id: DocumentId) -> Result<()> { let iter = facet_string_values(id, self.distinct, self.index, self.txn)?; for item in iter { @@ -73,7 +73,7 @@ impl<'a> FacetDistinctIter<'a> { Ok(()) } - fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> { + fn distinct_number(&mut self, id: DocumentId) -> Result<()> { let iter = facet_number_values(id, self.distinct, self.index, self.txn)?; for item in iter { @@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> { /// Performs the next iteration of the facet distinct. This is a convenience method that is /// called by the Iterator::next implementation that transposes the result. It makes error /// handling easier. - fn next_inner(&mut self) -> anyhow::Result> { + fn next_inner(&mut self) -> Result> { // The first step is to remove all the excluded documents from our candidates self.candidates.difference_with(&self.excluded); @@ -129,7 +129,7 @@ fn facet_number_values<'a>( distinct: FieldId, index: &Index, txn: &'a heed::RoTxn, -) -> anyhow::Result> { +) -> Result> { let key = facet_values_prefix_key(distinct, id); let iter = index @@ -146,7 +146,7 @@ fn facet_string_values<'a>( distinct: FieldId, index: &Index, txn: &'a heed::RoTxn, -) -> anyhow::Result> { +) -> Result> { let key = facet_values_prefix_key(distinct, id); let iter = index @@ -159,7 +159,7 @@ fn facet_string_values<'a>( } impl Iterator for FacetDistinctIter<'_> { - type Item = anyhow::Result; + type Item = Result; fn next(&mut self) -> Option { self.next_inner().transpose() diff --git a/milli/src/search/distinct/mod.rs b/milli/src/search/distinct/mod.rs index 1b7c69c7a..99bc74be0 100644 --- a/milli/src/search/distinct/mod.rs +++ b/milli/src/search/distinct/mod.rs @@ -3,13 +3,13 @@ mod noop_distinct; use roaring::RoaringBitmap; -use crate::DocumentId; +use crate::{DocumentId, Result}; pub use facet_distinct::FacetDistinct; pub use noop_distinct::NoopDistinct; /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. /// It provides a way to get back the ownership to the excluded set. -pub trait DocIter: Iterator> { +pub trait DocIter: Iterator> { /// Returns ownership on the internal exluded set. fn into_excluded(self) -> RoaringBitmap; } @@ -106,7 +106,7 @@ mod test { /// Checks that all the candidates are distinct, and returns the candidates number. pub(crate) fn validate_distinct_candidates( - candidates: impl Iterator>, + candidates: impl Iterator>, distinct: FieldId, index: &Index, ) -> usize { diff --git a/milli/src/search/distinct/noop_distinct.rs b/milli/src/search/distinct/noop_distinct.rs index bfaafed85..812701794 100644 --- a/milli/src/search/distinct/noop_distinct.rs +++ b/milli/src/search/distinct/noop_distinct.rs @@ -1,6 +1,6 @@ use roaring::{RoaringBitmap, bitmap::IntoIter}; -use crate::DocumentId; +use crate::{DocumentId, Result}; use super::{DocIter, Distinct}; /// A distinct implementer that does not perform any distinct, @@ -13,7 +13,7 @@ pub struct NoopDistinctIter { } impl Iterator for NoopDistinctIter { - type Item = anyhow::Result; + type Item = Result; fn next(&mut self) -> Option { self.candidates.next().map(Ok) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 565f4c6dd..917314b25 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -2,15 +2,15 @@ use std::collections::{HashSet, BTreeMap}; use std::ops::Bound::Unbounded; use std::{cmp, fmt}; -use anyhow::Context; use heed::{Database, BytesDecode}; use heed::types::{ByteSlice, Unit}; use roaring::RoaringBitmap; +use crate::error::FieldIdMapMissingEntry; use crate::facet::FacetType; use crate::heed_codec::facet::FacetValueStringCodec; use crate::search::facet::{FacetIter, FacetRange}; -use crate::{Index, FieldId, DocumentId}; +use crate::{Index, FieldId, DocumentId, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -195,14 +195,15 @@ impl<'a> FacetDistribution<'a> { } } - pub fn execute(&self) -> anyhow::Result>> { + pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?; let mut distribution = BTreeMap::new(); for name in filterable_fields { - let fid = fields_ids_map.id(&name).with_context(|| { - format!("missing field name {:?} from the fields id map", name) + let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName { + field_name: name.clone(), + from_db_name: "filterable-fields", })?; let values = self.facet_values(fid)?; distribution.insert(name, values); diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index f58443b6f..98d638574 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use std::fmt::Debug; use std::ops::Bound::{self, Included, Excluded}; +use std::result::Result as StdResult; use std::str::FromStr; use either::Either; @@ -11,8 +12,9 @@ use pest::iterators::{Pair, Pairs}; use pest::Parser; use roaring::RoaringBitmap; +use crate::error::UserError; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; -use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec}; +use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result}; use super::FacetRange; use super::parser::Rule; @@ -60,7 +62,7 @@ impl FilterCondition { rtxn: &heed::RoTxn, index: &Index, array: I, - ) -> anyhow::Result> + ) -> Result> where I: IntoIterator>, J: IntoIterator, A: AsRef, @@ -104,11 +106,11 @@ impl FilterCondition { rtxn: &heed::RoTxn, index: &Index, expression: &str, - ) -> anyhow::Result + ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields_ids(rtxn)?; - let lexed = FilterParser::parse(Rule::prgm, expression)?; + let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::FilterParsing)?; FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) } @@ -116,7 +118,7 @@ impl FilterCondition { fim: &FieldsIdsMap, ff: &HashSet, expression: Pairs, - ) -> anyhow::Result + ) -> Result { PREC_CLIMBER.climb( expression, @@ -133,7 +135,7 @@ impl FilterCondition { Rule::term => Self::from_pairs(fim, ff, pair.into_inner()), _ => unreachable!(), }, - |lhs: anyhow::Result, op: Pair, rhs: anyhow::Result| { + |lhs: Result, op: Pair, rhs: Result| { match op.as_rule() { Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))), Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))), @@ -158,16 +160,17 @@ impl FilterCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let (lresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap()); - let lvalue = lresult?; - let rvalue = rresult?; + let lvalue = lresult.map_err(UserError::FilterParsing)?; + let rvalue = rresult.map_err(UserError::FilterParsing)?; Ok(Operator(fid, Between(lvalue, rvalue))) } @@ -176,10 +179,11 @@ impl FilterCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let value = items.next().unwrap(); let (result, svalue) = pest_parse(value); @@ -192,60 +196,68 @@ impl FilterCondition { fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); + let value = result.map_err(UserError::FilterParsing)?; - Ok(Operator(fid, GreaterThan(result?))) + Ok(Operator(fid, GreaterThan(value))) } fn greater_than_or_equal( fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); + let value = result.map_err(UserError::FilterParsing)?; - Ok(Operator(fid, GreaterThanOrEqual(result?))) + Ok(Operator(fid, GreaterThanOrEqual(value))) } fn lower_than( fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); + let value = result.map_err(UserError::FilterParsing)?; - Ok(Operator(fid, LowerThan(result?))) + Ok(Operator(fid, LowerThan(value))) } fn lower_than_or_equal( fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, item: Pair, - ) -> anyhow::Result + ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; + let fid = field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::FilterParsing)?; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); + let value = result.map_err(UserError::FilterParsing)?; - Ok(Operator(fid, LowerThanOrEqual(result?))) + Ok(Operator(fid, LowerThanOrEqual(value))) } } @@ -260,7 +272,7 @@ impl FilterCondition { left: Bound, right: Bound, output: &mut RoaringBitmap, - ) -> anyhow::Result<()> + ) -> Result<()> { match (left, right) { // If the request is an exact value we must go directly to the deepest level. @@ -332,7 +344,7 @@ impl FilterCondition { strings_db: heed::Database, field_id: FieldId, operator: &Operator, - ) -> anyhow::Result + ) -> Result { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the @@ -390,7 +402,7 @@ impl FilterCondition { &self, rtxn: &heed::RoTxn, index: &Index, - ) -> anyhow::Result + ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -422,7 +434,7 @@ fn field_id( fields_ids_map: &FieldsIdsMap, filterable_fields: &HashSet, items: &mut Pairs, -) -> Result> +) -> StdResult> { // lexing ensures that we at least have a key let key = items.next().unwrap(); @@ -463,7 +475,7 @@ fn field_id( /// the original string that we tried to parse. /// /// Returns the parsing error associated with the span if the conversion fails. -fn pest_parse(pair: Pair) -> (Result>, String) +fn pest_parse(pair: Pair) -> (StdResult>, String) where T: FromStr, T::Err: ToString, { diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index a5e02fc9f..a1a03dba3 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -9,8 +9,9 @@ use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::{Index, FieldId}; -pub use self::filter_condition::{FilterCondition, Operator}; pub use self::facet_distribution::FacetDistribution; +pub use self::filter_condition::{FilterCondition, Operator}; +pub(crate) use self::parser::Rule as ParserRule; mod filter_condition; mod facet_distribution; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 872ebfca6..f8c7b5d9b 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::collections::hash_map::{Entry, HashMap}; use std::fmt; use std::mem::take; +use std::result::Result as StdResult; use std::str::Utf8Error; use std::time::Instant; @@ -14,10 +15,11 @@ use roaring::bitmap::RoaringBitmap; use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; use crate::search::criteria::r#final::{Final, FinalResult}; -use crate::{Index, DocumentId}; +use crate::{Index, DocumentId, Result}; pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator}; pub use self::matching_words::MatchingWords; +pub(crate) use self::facet::ParserRule; use self::query_tree::QueryTreeBuilder; // Building these factories is not free. @@ -93,7 +95,7 @@ impl<'a> Search<'a> { self } - pub fn execute(&self) -> anyhow::Result { + pub fn execute(&self) -> Result { // We create the query tree by spliting the query into tokens. let before = Instant::now(); let (query_tree, primitive_query) = match self.query.as_ref() { @@ -152,7 +154,7 @@ impl<'a> Search<'a> { mut distinct: D, matching_words: MatchingWords, mut criteria: Final, - ) -> anyhow::Result + ) -> Result { let mut offset = self.offset; let mut initial_candidates = RoaringBitmap::new(); @@ -225,7 +227,7 @@ pub fn word_derivations<'c>( max_typo: u8, fst: &fst::Set>, cache: &'c mut WordDerivationsCache, -) -> Result<&'c [(String, u8)], Utf8Error> { +) -> StdResult<&'c [(String, u8)], Utf8Error> { match cache.entry((word.to_string(), is_prefix, max_typo)) { Entry::Occupied(entry) => Ok(entry.into_mut()), Entry::Vacant(entry) => { diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 3c3420db4..c371b07d4 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -7,7 +7,7 @@ use meilisearch_tokenizer::TokenKind; use roaring::RoaringBitmap; use slice_group_by::GroupBy; -use crate::Index; +use crate::{Index, Result}; type IsOptionalWord = bool; type IsPrefix = bool; @@ -219,7 +219,7 @@ impl<'a> QueryTreeBuilder<'a> { /// - if `authorize_typos` is set to `false` the query tree will be generated /// forcing all query words to match documents without any typo /// (the criterion `typo` will be ignored) - pub fn build(&self, query: TokenStream) -> anyhow::Result> { + pub fn build(&self, query: TokenStream) -> Result> { let stop_words = self.index.stop_words(self.rtxn)?; let primitive_query = create_primitive_query(query, stop_words, self.words_limit); if !primitive_query.is_empty() { @@ -291,14 +291,14 @@ fn create_query_tree( optional_words: bool, authorize_typos: bool, query: &[PrimitiveQueryPart], -) -> anyhow::Result +) -> Result { /// Matches on the `PrimitiveQueryPart` and create an operation from it. fn resolve_primitive_part( ctx: &impl Context, authorize_typos: bool, part: PrimitiveQueryPart, - ) -> anyhow::Result + ) -> Result { match part { // 1. try to split word in 2 @@ -325,7 +325,7 @@ fn create_query_tree( ctx: &impl Context, authorize_typos: bool, query: &[PrimitiveQueryPart], - ) -> anyhow::Result + ) -> Result { const MAX_NGRAM: usize = 3; let mut op_children = Vec::new(); @@ -379,7 +379,7 @@ fn create_query_tree( ctx: &impl Context, authorize_typos: bool, query: PrimitiveQuery, - ) -> anyhow::Result + ) -> Result { let number_phrases = query.iter().filter(|p| p.is_phrase()).count(); let mut operation_children = Vec::new(); @@ -532,7 +532,7 @@ mod test { authorize_typos: bool, words_limit: Option, query: TokenStream, - ) -> anyhow::Result> + ) -> Result> { let primitive_query = create_primitive_query(query, None, words_limit); if !primitive_query.is_empty() { diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index f4c13e8f8..6e26bf027 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -1,6 +1,7 @@ use chrono::Utc; use roaring::RoaringBitmap; -use crate::{ExternalDocumentsIds, Index, FieldsDistribution}; + +use crate::{ExternalDocumentsIds, Index, FieldsDistribution, Result}; pub struct ClearDocuments<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -18,7 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { ClearDocuments { wtxn, index, _update_id: update_id } } - pub fn execute(self) -> anyhow::Result { + pub fn execute(self) -> Result { self.index.set_updated_at(self.wtxn, &Utc::now())?; let Index { env: _env, diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index c4cf132bb..6792d6278 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -1,15 +1,15 @@ use std::collections::HashMap; use std::collections::hash_map::Entry; -use anyhow::{anyhow, Context}; use chrono::Utc; use fst::IntoStreamer; use heed::types::{ByteSlice, Unit}; use roaring::RoaringBitmap; use serde_json::Value; +use crate::error::{InternalError, UserError}; use crate::heed_codec::CboRoaringBitmapCodec; -use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds}; +use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result}; use super::ClearDocuments; pub struct DeleteDocuments<'t, 'u, 'i> { @@ -25,7 +25,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index, update_id: u64, - ) -> anyhow::Result> + ) -> Result> { let external_documents_ids = index .external_documents_ids(wtxn)? @@ -54,7 +54,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Some(docid) } - pub fn execute(self) -> anyhow::Result { + pub fn execute(self) -> Result { self.index.set_updated_at(self.wtxn, &Utc::now())?; // We retrieve the current documents ids that are in the database. let mut documents_ids = self.index.documents_ids(self.wtxn)?; @@ -77,7 +77,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { } let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; - let primary_key = self.index.primary_key(self.wtxn)?.context("missing primary key")?; + let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| { + InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") } + })?; let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#); let Index { @@ -119,7 +121,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let external_id = match serde_json::from_slice(content).unwrap() { Value::String(string) => SmallString32::from(string.as_str()), Value::Number(number) => SmallString32::from(number.to_string()), - _ => return Err(anyhow!("documents ids must be either strings or numbers")), + document_id => return Err(UserError::InvalidDocumentId { document_id }.into()), }; external_ids.push(external_id); } diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index f0eab6023..757cbe810 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -9,11 +9,12 @@ use heed::{BytesEncode, Error}; use log::debug; use roaring::RoaringBitmap; +use crate::error::InternalError; use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::facet::FacetLevelValueF64Codec; -use crate::Index; use crate::update::index_documents::WriteMethod; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; +use crate::{Index, Result}; pub struct Facets<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -55,7 +56,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { self } - pub fn execute(self) -> anyhow::Result<()> { + pub fn execute(self) -> Result<()> { self.index.set_updated_at(self.wtxn, &Utc::now())?; // We get the faceted fields to be able to create the facet levels. let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; @@ -102,7 +103,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { self.wtxn, *self.index.facet_id_f64_docids.as_polymorph(), content, - |_, _| anyhow::bail!("invalid facet number level merging"), + |_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }), WriteMethod::GetMergePut, )?; } @@ -132,7 +133,7 @@ fn compute_facet_number_levels<'t>( level_group_size: NonZeroUsize, min_level_size: NonZeroUsize, field_id: u8, -) -> anyhow::Result> +) -> Result> { let first_level_size = db .remap_key_type::() @@ -195,7 +196,7 @@ fn compute_faceted_documents_ids( rtxn: &heed::RoTxn, db: heed::Database, field_id: u8, -) -> anyhow::Result +) -> Result { let mut documents_ids = RoaringBitmap::new(); @@ -214,7 +215,7 @@ fn write_number_entry( left: f64, right: f64, ids: &RoaringBitmap, -) -> anyhow::Result<()> +) -> Result<()> { let key = (field_id, level, left, right); let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?; diff --git a/milli/src/update/index_documents/merge_function.rs b/milli/src/update/index_documents/merge_function.rs index 8c93773ce..3d9ffda6a 100644 --- a/milli/src/update/index_documents/merge_function.rs +++ b/milli/src/update/index_documents/merge_function.rs @@ -1,17 +1,19 @@ use std::borrow::Cow; +use std::result::Result as StdResult; use fst::IntoStreamer; use roaring::RoaringBitmap; use crate::heed_codec::CboRoaringBitmapCodec; +use crate::Result; /// Only the last value associated with an id is kept. -pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { +pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result> { Ok(obkvs.last().unwrap().clone().into_owned()) } /// Merge all the obks in the order we see them. -pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> { +pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result> { let mut iter = obkvs.iter(); let first = iter.next().map(|b| b.clone().into_owned()).unwrap(); Ok(iter.fold(first, |acc, current| { @@ -24,8 +26,8 @@ pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result> } // Union of multiple FSTs -pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { - let fsts = values.iter().map(fst::Set::new).collect::, _>>()?; +pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result> { + let fsts = values.iter().map(fst::Set::new).collect::, _>>()?; let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect(); let op = op_builder.r#union(); @@ -34,7 +36,7 @@ pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { Ok(build.into_inner().unwrap()) } -pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { +pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result> { Ok(values.first().unwrap().to_vec()) } @@ -54,7 +56,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu writer.finish().unwrap(); } -pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { +pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result> { let (head, tail) = values.split_first().unwrap(); let mut head = RoaringBitmap::deserialize_from(&head[..])?; @@ -68,7 +70,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result Ok(vec) } -pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result> { +pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result> { let (head, tail) = values.split_first().unwrap(); let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 1d31cba85..51c8b948a 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -3,11 +3,11 @@ use std::collections::HashSet; use std::fs::File; use std::io::{self, Seek, SeekFrom, BufReader, BufRead}; use std::num::{NonZeroU32, NonZeroUsize}; +use std::result::Result as StdResult; use std::str; use std::sync::mpsc::sync_channel; use std::time::Instant; -use anyhow::Context; use bstr::ByteSlice as _; use chrono::Utc; use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType}; @@ -18,7 +18,8 @@ use rayon::prelude::*; use rayon::ThreadPool; use serde::{Serialize, Deserialize}; -use crate::index::Index; +use crate::error::{Error, InternalError}; +use crate::{Index, Result}; use crate::update::{ Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep, WordPrefixPairProximityDocids, @@ -56,14 +57,14 @@ pub fn create_writer(typ: CompressionType, level: Option, file: File) -> io builder.build(file) } -pub fn create_sorter( - merge: MergeFn, +pub fn create_sorter( + merge: MergeFn, chunk_compression_type: CompressionType, chunk_compression_level: Option, chunk_fusing_shrink_size: Option, max_nb_chunks: Option, max_memory: Option, -) -> Sorter +) -> Sorter> { let mut builder = Sorter::builder(merge); if let Some(shrink_size) = chunk_fusing_shrink_size { @@ -82,7 +83,7 @@ pub fn create_sorter( builder.build() } -pub fn writer_into_reader(writer: Writer, shrink_size: Option) -> anyhow::Result> { +pub fn writer_into_reader(writer: Writer, shrink_size: Option) -> Result> { let mut file = writer.into_inner()?; file.seek(SeekFrom::Start(0))?; let file = if let Some(shrink_size) = shrink_size { @@ -93,19 +94,25 @@ pub fn writer_into_reader(writer: Writer, shrink_size: Option) -> any Reader::new(file).map_err(Into::into) } -pub fn merge_readers(sources: Vec>, merge: MergeFn) -> Merger { +pub fn merge_readers( + sources: Vec>, + merge: MergeFn, +) -> Merger> +{ let mut builder = Merger::builder(merge); builder.extend(sources); builder.build() } -pub fn merge_into_lmdb_database( +pub fn merge_into_lmdb_database( wtxn: &mut heed::RwTxn, database: heed::PolyDatabase, sources: Vec>, - merge: MergeFn, + merge: MergeFn, method: WriteMethod, -) -> anyhow::Result<()> +) -> Result<()> +where + Error: From, { debug!("Merging {} MTBL stores...", sources.len()); let before = Instant::now(); @@ -123,13 +130,15 @@ pub fn merge_into_lmdb_database( Ok(()) } -pub fn write_into_lmdb_database( +pub fn write_into_lmdb_database( wtxn: &mut heed::RwTxn, database: heed::PolyDatabase, mut reader: Reader, - merge: MergeFn, + merge: MergeFn, method: WriteMethod, -) -> anyhow::Result<()> +) -> Result<()> +where + Error: From, { debug!("Writing MTBL stores..."); let before = Instant::now(); @@ -138,9 +147,7 @@ pub fn write_into_lmdb_database( WriteMethod::Append => { let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; while let Some((k, v)) = reader.next()? { - out_iter.append(k, v).with_context(|| { - format!("writing {:?} into LMDB", k.as_bstr()) - })?; + out_iter.append(k, v)?; } }, WriteMethod::GetMergePut => { @@ -165,13 +172,16 @@ pub fn write_into_lmdb_database( Ok(()) } -pub fn sorter_into_lmdb_database( +pub fn sorter_into_lmdb_database( wtxn: &mut heed::RwTxn, database: heed::PolyDatabase, - sorter: Sorter, - merge: MergeFn, + sorter: Sorter>, + merge: MergeFn, method: WriteMethod, -) -> anyhow::Result<()> +) -> Result<()> +where + Error: From, + Error: From> { debug!("Writing MTBL sorter..."); let before = Instant::now(); @@ -188,21 +198,21 @@ pub fn sorter_into_lmdb_database( Ok(()) } -fn merger_iter_into_lmdb_database( +fn merger_iter_into_lmdb_database( wtxn: &mut heed::RwTxn, database: heed::PolyDatabase, - mut sorter: MergerIter, - merge: MergeFn, + mut sorter: MergerIter>, + merge: MergeFn, method: WriteMethod, -) -> anyhow::Result<()> +) -> Result<()> +where + Error: From, { match method { WriteMethod::Append => { let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; while let Some((k, v)) = sorter.next()? { - out_iter.append(k, v).with_context(|| { - format!("writing {:?} into LMDB", k.as_bstr()) - })?; + out_iter.append(k, v)?; } }, WriteMethod::GetMergePut => { @@ -211,7 +221,10 @@ fn merger_iter_into_lmdb_database( match iter.next().transpose()? { Some((key, old_val)) if key == k => { let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)]; - let val = merge(k, &vals).expect("merge failed"); + let val = merge(k, &vals).map_err(|_| { + // TODO just wrap this error? + InternalError::IndexingMergingKeys { process: "get-put-merge" } + })?; iter.put_current(k, &val)?; }, _ => { @@ -318,7 +331,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { self.autogenerate_docids = false; } - pub fn execute(self, reader: R, progress_callback: F) -> anyhow::Result + pub fn execute(self, reader: R, progress_callback: F) -> Result where R: io::Read, F: Fn(UpdateIndexingStep, u64) + Sync, @@ -365,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { Ok(DocumentAdditionResult { nb_documents }) } - pub fn execute_raw(self, output: TransformOutput, progress_callback: F) -> anyhow::Result<()> + pub fn execute_raw(self, output: TransformOutput, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync { @@ -403,15 +416,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { debug!("{} documents actually deleted", deleted_documents_count); } - let mmap; - let bytes = if documents_count == 0 { - &[][..] - } else { - mmap = unsafe { Mmap::map(&documents_file).context("mmaping the transform documents file")? }; - &mmap - }; + if documents_count == 0 { + return Ok(()); + } - let documents = grenad::Reader::new(bytes).unwrap(); + let bytes = unsafe { Mmap::map(&documents_file)? }; + let documents = grenad::Reader::new(bytes.as_bytes()).unwrap(); // The enum which indicates the type of the readers // merges that are potentially done on different threads. @@ -477,7 +487,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { &progress_callback, ) }) - .collect::, _>>()?; + .collect::, _>>()?; let mut main_readers = Vec::with_capacity(readers.len()); let mut word_docids_readers = Vec::with_capacity(readers.len()); @@ -535,7 +545,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { debug!("Merging the main, word docids and words pairs proximity docids in parallel..."); rayon::spawn(move || { vec![ - (DatabaseType::Main, main_readers, fst_merge as MergeFn), + (DatabaseType::Main, main_readers, fst_merge as MergeFn<_>), (DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge), ( DatabaseType::FacetLevel0NumbersDocids, @@ -570,7 +580,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { facet_field_strings_docids_readers, field_id_docid_facet_numbers_readers, field_id_docid_facet_strings_readers, - )) as anyhow::Result<_> + )) as Result<_> })?; let ( diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index 4662cd609..e5e55682e 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -6,7 +6,6 @@ use std::iter::FromIterator; use std::time::Instant; use std::{cmp, iter}; -use anyhow::Context; use bstr::ByteSlice as _; use fst::Set; use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; @@ -19,11 +18,12 @@ use roaring::RoaringBitmap; use serde_json::Value; use tempfile::tempfile; +use crate::error::{Error, InternalError, SerializationError}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::update::UpdateIndexingStep; -use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId}; +use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, Result}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge}; @@ -66,15 +66,15 @@ pub struct Store<'s, A> { chunk_compression_level: Option, chunk_fusing_shrink_size: Option, // MTBL sorters - main_sorter: Sorter, - word_docids_sorter: Sorter, - words_pairs_proximities_docids_sorter: Sorter, - word_level_position_docids_sorter: Sorter, - field_id_word_count_docids_sorter: Sorter, - facet_field_numbers_docids_sorter: Sorter, - facet_field_strings_docids_sorter: Sorter, - field_id_docid_facet_numbers_sorter: Sorter, - field_id_docid_facet_strings_sorter: Sorter, + main_sorter: Sorter>, + word_docids_sorter: Sorter>, + words_pairs_proximities_docids_sorter: Sorter>, + word_level_position_docids_sorter: Sorter>, + field_id_word_count_docids_sorter: Sorter>, + facet_field_numbers_docids_sorter: Sorter>, + facet_field_strings_docids_sorter: Sorter>, + field_id_docid_facet_numbers_sorter: Sorter>, + field_id_docid_facet_strings_sorter: Sorter>, // MTBL writers docid_word_positions_writer: Writer, documents_writer: Writer, @@ -93,7 +93,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { chunk_compression_level: Option, chunk_fusing_shrink_size: Option, stop_words: Option<&'s Set>, - ) -> anyhow::Result + ) -> Result { // We divide the max memory by the number of sorter the Store have. let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5)); @@ -221,7 +221,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { } // Save the documents ids under the position and word we have seen it. - fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> anyhow::Result<()> { + fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> Result<()> { // if get_refresh finds the element it is assured to be at the end of the linked hash map. match self.word_docids.get_refresh(word.as_bytes()) { Some(old) => { old.insert(id); }, @@ -246,7 +246,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { field_id: FieldId, value: OrderedFloat, id: DocumentId, - ) -> anyhow::Result<()> + ) -> Result<()> { let sorter = &mut self.field_id_docid_facet_numbers_sorter; Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?; @@ -279,7 +279,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { field_id: FieldId, value: String, id: DocumentId, - ) -> anyhow::Result<()> + ) -> Result<()> { let sorter = &mut self.field_id_docid_facet_strings_sorter; Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?; @@ -311,7 +311,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { &mut self, words_pairs_proximities: impl IntoIterator, id: DocumentId, - ) -> anyhow::Result<()> + ) -> Result<()> { for ((w1, w2), prox) in words_pairs_proximities { let w1 = SmallVec32::from(w1.as_bytes()); @@ -350,7 +350,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { facet_numbers_values: &mut HashMap>, facet_strings_values: &mut HashMap>, record: &[u8], - ) -> anyhow::Result<()> + ) -> Result<()> { // We compute the list of words pairs proximities (self-join) and write it directly to disk. let words_pair_proximities = compute_words_pair_proximities(&words_positions); @@ -385,10 +385,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_words_pairs_proximities( - sorter: &mut Sorter, + fn write_words_pairs_proximities( + sorter: &mut Sorter>, iter: impl IntoIterator, SmallVec32, u8), RoaringBitmap)>, - ) -> anyhow::Result<()> + ) -> Result<()> + where + Error: From, { let mut key = Vec::new(); let mut buffer = Vec::new(); @@ -417,7 +419,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { writer: &mut Writer, id: DocumentId, words_positions: &HashMap>, - ) -> anyhow::Result<()> + ) -> Result<()> { // We prefix the words by the document id. let mut key = id.to_be_bytes().to_vec(); @@ -445,11 +447,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_word_position_docids( - writer: &mut Sorter, + fn write_word_position_docids( + writer: &mut Sorter>, document_id: DocumentId, words_positions: &HashMap>, - ) -> anyhow::Result<()> + ) -> Result<()> + where + Error: From, { let mut key_buffer = Vec::new(); let mut data_buffer = Vec::new(); @@ -480,11 +484,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_facet_field_string_docids( - sorter: &mut Sorter, + fn write_facet_field_string_docids( + sorter: &mut Sorter>, iter: I, - ) -> anyhow::Result<()> - where I: IntoIterator + ) -> Result<()> + where + I: IntoIterator, + Error: From, { let mut key_buffer = Vec::new(); let mut data_buffer = Vec::new(); @@ -504,11 +510,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_facet_field_number_docids( - sorter: &mut Sorter, + fn write_facet_field_number_docids( + sorter: &mut Sorter>, iter: I, - ) -> anyhow::Result<()> - where I: IntoIterator), RoaringBitmap)> + ) -> Result<()> + where + I: IntoIterator), RoaringBitmap)>, + Error: From, { let mut data_buffer = Vec::new(); @@ -517,7 +525,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value)) .map(Cow::into_owned) - .context("could not serialize facet level value key")?; + .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?; CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer); @@ -529,16 +537,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_field_id_docid_facet_number_value( - sorter: &mut Sorter, + fn write_field_id_docid_facet_number_value( + sorter: &mut Sorter>, field_id: FieldId, document_id: DocumentId, value: OrderedFloat, - ) -> anyhow::Result<()> + ) -> Result<()> + where + Error: From, { let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value)) .map(Cow::into_owned) - .context("could not serialize facet level value key")?; + .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?; if lmdb_key_valid_size(&key) { sorter.insert(&key, &[])?; @@ -547,12 +557,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_field_id_docid_facet_string_value( - sorter: &mut Sorter, + fn write_field_id_docid_facet_string_value( + sorter: &mut Sorter>, field_id: FieldId, document_id: DocumentId, value: &str, - ) -> anyhow::Result<()> + ) -> Result<()> + where + Error: From, { let mut buffer = Vec::new(); @@ -565,8 +577,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(()) } - fn write_word_docids(sorter: &mut Sorter, iter: I) -> anyhow::Result<()> - where I: IntoIterator, RoaringBitmap)> + fn write_word_docids(sorter: &mut Sorter>, iter: I) -> Result<()> + where + I: IntoIterator, RoaringBitmap)>, + Error: From, { let mut key = Vec::new(); let mut buffer = Vec::new(); @@ -596,7 +610,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { num_threads: usize, log_every_n: Option, mut progress_callback: F, - ) -> anyhow::Result + ) -> Result where F: FnMut(UpdateIndexingStep), { debug!("{:?}: Indexing in a Store...", thread_index); @@ -625,7 +639,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { for (attr, content) in document.iter() { if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) { - let value = serde_json::from_slice(content)?; + let value = serde_json::from_slice(content).map_err(InternalError::SerdeJson)?; let (facet_numbers, facet_strings) = extract_facet_values(&value); facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers); @@ -679,7 +693,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { Ok(readers) } - fn finish(mut self) -> anyhow::Result { + fn finish(mut self) -> Result { let comp_type = self.chunk_compression_type; let comp_level = self.chunk_compression_level; let shrink_size = self.chunk_fusing_shrink_size; diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 5fbd24bb1..82003eddc 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -2,17 +2,19 @@ use std::borrow::Cow; use std::fs::File; use std::io::{Read, Seek, SeekFrom}; use std::iter::Peekable; +use std::result::Result as StdResult; use std::time::Instant; -use anyhow::{anyhow, Context}; use grenad::CompressionType; use log::info; use roaring::RoaringBitmap; use serde_json::{Map, Value}; +use crate::error::{Error, UserError, InternalError}; use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; -use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; +use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; +use crate::{Index, Result}; use super::merge_function::merge_two_obkvs; use super::{create_writer, create_sorter, IndexDocumentsMethod}; @@ -53,7 +55,7 @@ fn is_primary_key(field: impl AsRef) -> bool { } impl Transform<'_, '_> { - pub fn output_from_json(self, reader: R, progress_callback: F) -> anyhow::Result + pub fn output_from_json(self, reader: R, progress_callback: F) -> Result where R: Read, F: Fn(UpdateIndexingStep) + Sync, @@ -61,7 +63,7 @@ impl Transform<'_, '_> { self.output_from_generic_json(reader, false, progress_callback) } - pub fn output_from_json_stream(self, reader: R, progress_callback: F) -> anyhow::Result + pub fn output_from_json_stream(self, reader: R, progress_callback: F) -> Result where R: Read, F: Fn(UpdateIndexingStep) + Sync, @@ -74,7 +76,7 @@ impl Transform<'_, '_> { reader: R, is_stream: bool, progress_callback: F, - ) -> anyhow::Result + ) -> Result where R: Read, F: Fn(UpdateIndexingStep) + Sync, @@ -88,7 +90,7 @@ impl Transform<'_, '_> { let iter = Box::new(iter) as Box>; iter.peekable() } else { - let vec: Vec<_> = serde_json::from_reader(reader)?; + let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?; let iter = vec.into_iter().map(Ok); let iter = Box::new(iter) as Box>; iter.peekable() @@ -96,9 +98,12 @@ impl Transform<'_, '_> { // We extract the primary key from the first document in // the batch if it hasn't already been defined in the index - let first = match documents.peek().map(Result::as_ref).transpose() { + let first = match documents.peek().map(StdResult::as_ref).transpose() { Ok(first) => first, - Err(_) => return Err(documents.next().unwrap().unwrap_err().into()), + Err(_) => { + let error = documents.next().unwrap().unwrap_err(); + return Err(UserError::SerdeJson(error).into()); + }, }; let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); @@ -145,7 +150,7 @@ impl Transform<'_, '_> { let mut documents_count = 0; for result in documents { - let document = result?; + let document = result.map_err(UserError::SerdeJson)?; if self.log_every_n.map_or(false, |len| documents_count % len == 0) { progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { @@ -158,7 +163,7 @@ impl Transform<'_, '_> { // We prepare the fields ids map with the documents keys. for (key, _value) in &document { - fields_ids_map.insert(&key).context("field id limit reached")?; + fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?; } // We retrieve the user id from the document based on the primary key name, @@ -167,11 +172,13 @@ impl Transform<'_, '_> { Some(value) => match value { Value::String(string) => Cow::Borrowed(string.as_str()), Value::Number(number) => Cow::Owned(number.to_string()), - _ => return Err(anyhow!("documents ids must be either strings or numbers")), + content => return Err(UserError::InvalidDocumentId { + document_id: content.clone(), + }.into()), }, None => { if !self.autogenerate_docids { - return Err(anyhow!("missing primary key")); + return Err(UserError::MissingPrimaryKey.into()); } let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer); Cow::Borrowed(uuid) @@ -186,13 +193,15 @@ impl Transform<'_, '_> { // and this should be the document id we return the one we generated. if let Some(value) = document.get(name) { // We serialize the attribute values. - serde_json::to_writer(&mut json_buffer, value)?; + serde_json::to_writer(&mut json_buffer, value).map_err(InternalError::SerdeJson)?; writer.insert(field_id, &json_buffer)?; } // We validate the document id [a-zA-Z0-9\-_]. if field_id == primary_key_id && validate_document_id(&external_id).is_none() { - return Err(anyhow!("invalid document id: {:?}", external_id)); + return Err(UserError::InvalidDocumentId { + document_id: Value::from(external_id), + }.into()); } } @@ -217,7 +226,7 @@ impl Transform<'_, '_> { ) } - pub fn output_from_csv(self, reader: R, progress_callback: F) -> anyhow::Result + pub fn output_from_csv(self, reader: R, progress_callback: F) -> Result where R: Read, F: Fn(UpdateIndexingStep) + Sync, @@ -226,12 +235,12 @@ impl Transform<'_, '_> { let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); let mut csv = csv::Reader::from_reader(reader); - let headers = csv.headers()?; + let headers = csv.headers().map_err(UserError::Csv)?; let mut fields_ids = Vec::new(); // Generate the new fields ids based on the current fields ids and this CSV headers. for (i, header) in headers.iter().enumerate() { - let id = fields_ids_map.insert(header).context("field id limit reached)")?; + let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?; fields_ids.push((id, i)); } @@ -281,7 +290,7 @@ impl Transform<'_, '_> { let mut documents_count = 0; let mut record = csv::StringRecord::new(); - while csv.read_record(&mut record)? { + while csv.read_record(&mut record).map_err(UserError::Csv)? { obkv_buffer.clear(); let mut writer = obkv::KvWriter::new(&mut obkv_buffer); @@ -298,7 +307,9 @@ impl Transform<'_, '_> { // We validate the document id [a-zA-Z0-9\-_]. match validate_document_id(&external_id) { Some(valid) => valid, - None => return Err(anyhow!("invalid document id: {:?}", external_id)), + None => return Err(UserError::InvalidDocumentId { + document_id: Value::from(external_id), + }.into()), } }, None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer), @@ -316,7 +327,7 @@ impl Transform<'_, '_> { for (field_id, field) in iter { // We serialize the attribute values as JSON strings. json_buffer.clear(); - serde_json::to_writer(&mut json_buffer, &field)?; + serde_json::to_writer(&mut json_buffer, &field).map_err(InternalError::SerdeJson)?; writer.insert(*field_id, &json_buffer)?; } @@ -344,17 +355,18 @@ impl Transform<'_, '_> { /// Generate the `TransformOutput` based on the given sorter that can be generated from any /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// id for the user side and the value must be an obkv where keys are valid fields ids. - fn output_from_sorter( + fn output_from_sorter( self, - sorter: grenad::Sorter, + sorter: grenad::Sorter>, primary_key: String, fields_ids_map: FieldsIdsMap, approximate_number_of_documents: usize, mut external_documents_ids: ExternalDocumentsIds<'_>, progress_callback: F, - ) -> anyhow::Result + ) -> Result where F: Fn(UpdateIndexingStep) + Sync, + Error: From, { let documents_ids = self.index.documents_ids(self.rtxn)?; let mut fields_distribution = self.index.fields_distribution(self.rtxn)?; @@ -362,7 +374,7 @@ impl Transform<'_, '_> { // Once we have sort and deduplicated the documents we write them into a final file. let mut final_sorter = create_sorter( - |_docid, _obkvs| Err(anyhow!("cannot merge two documents")), + |_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }), self.chunk_compression_type, self.chunk_compression_level, self.chunk_fusing_shrink_size, @@ -398,7 +410,10 @@ impl Transform<'_, '_> { IndexDocumentsMethod::UpdateDocuments => { let key = BEU32::new(docid); let base_obkv = self.index.documents.get(&self.rtxn, &key)? - .context("document not found")?; + .ok_or(InternalError::DatabaseMissingEntry { + db_name: "documents", + key: None, + })?; let update_obkv = obkv::KvReader::new(update_obkv); merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer); (docid, obkv_buffer.as_slice()) @@ -409,7 +424,7 @@ impl Transform<'_, '_> { // If this user id is new we add it to the external documents ids map // for new ids and into the list of new documents. let new_docid = available_documents_ids.next() - .context("no more available documents ids")?; + .ok_or(UserError::DocumentLimitReached)?; new_external_documents_ids_builder.insert(external_id, new_docid as u64)?; new_documents_ids.insert(new_docid); (new_docid, update_obkv) @@ -469,7 +484,7 @@ impl Transform<'_, '_> { primary_key: String, old_fields_ids_map: FieldsIdsMap, new_fields_ids_map: FieldsIdsMap, - ) -> anyhow::Result + ) -> Result { let fields_distribution = self.index.fields_distribution(self.rtxn)?; let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; @@ -529,10 +544,10 @@ fn compute_primary_key_pair( fields_ids_map: &mut FieldsIdsMap, alternative_name: Option, autogenerate_docids: bool, -) -> anyhow::Result<(FieldId, String)> { +) -> Result<(FieldId, String)> { match primary_key { Some(primary_key) => { - let id = fields_ids_map.insert(primary_key).ok_or(anyhow!("Maximum number of fields exceeded"))?; + let id = fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?; Ok((id, primary_key.to_string())) } None => { @@ -542,12 +557,12 @@ fn compute_primary_key_pair( if !autogenerate_docids { // If there is no primary key in the current document batch, we must // return an error and not automatically generate any document id. - anyhow::bail!("missing primary key") + return Err(UserError::MissingPrimaryKey.into()); } DEFAULT_PRIMARY_KEY_NAME.to_string() }, }; - let id = fields_ids_map.insert(&name).context("field id limit reached")?; + let id = fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?; Ok((id, name)) }, } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 1c687e089..1756a21c9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,6 +1,6 @@ use std::collections::{BTreeSet, HashMap, HashSet}; +use std::result::Result as StdResult; -use anyhow::Context; use chrono::Utc; use grenad::CompressionType; use itertools::Itertools; @@ -9,9 +9,10 @@ use rayon::ThreadPool; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::criterion::Criterion; +use crate::error::UserError; use crate::update::index_documents::{IndexDocumentsMethod, Transform}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; -use crate::{FieldsIdsMap, Index}; +use crate::{FieldsIdsMap, Index, Result}; #[derive(Debug, Clone, PartialEq)] pub enum Setting { @@ -33,7 +34,7 @@ impl Setting { } impl Serialize for Setting { - fn serialize(&self, serializer: S) -> Result where S: Serializer { + fn serialize(&self, serializer: S) -> StdResult where S: Serializer { match self { Self::Set(value) => Some(value), // Usually not_set isn't serialized by setting skip_serializing_if field attribute @@ -43,7 +44,7 @@ impl Serialize for Setting { } impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { - fn deserialize(deserializer: D) -> Result where D: Deserializer<'de> { + fn deserialize(deserializer: D) -> StdResult where D: Deserializer<'de> { Deserialize::deserialize(deserializer).map(|x| match x { Some(x) => Self::Set(x), None => Self::Reset, // Reset is forced by sending null value @@ -165,7 +166,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } - fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> + fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> where F: Fn(UpdateIndexingStep, u64) + Sync { @@ -192,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { }; // There already has been a document addition, the primary key should be set by now. - let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?; + let primary_key = self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?; // We remap the documents fields based on the new `FieldsIdsMap`. let output = transform.remap_index_documents( @@ -220,7 +221,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } - fn update_displayed(&mut self) -> anyhow::Result { + fn update_displayed(&mut self) -> Result { match self.displayed_fields { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; @@ -234,7 +235,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { for name in names.iter() { fields_ids_map .insert(name) - .context("field id limit exceeded")?; + .ok_or(UserError::AttributeLimitReached)?; } self.index.put_displayed_fields(self.wtxn, &names)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; @@ -245,13 +246,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(true) } - fn update_distinct_field(&mut self) -> anyhow::Result { + fn update_distinct_field(&mut self) -> Result { match self.distinct_field { Setting::Set(ref attr) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; fields_ids_map .insert(attr) - .context("field id limit exceeded")?; + .ok_or(UserError::AttributeLimitReached)?; self.index.put_distinct_field(self.wtxn, &attr)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; @@ -264,7 +265,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { /// Updates the index's searchable attributes. This causes the field map to be recomputed to /// reflect the order of the searchable attributes. - fn update_searchable(&mut self) -> anyhow::Result { + fn update_searchable(&mut self) -> Result { match self.searchable_fields { Setting::Set(ref fields) => { // every time the searchable attributes are updated, we need to update the @@ -285,13 +286,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { for name in names.iter() { new_fields_ids_map .insert(&name) - .context("field id limit exceeded")?; + .ok_or(UserError::AttributeLimitReached)?; } for (_, name) in old_fields_ids_map.iter() { new_fields_ids_map .insert(&name) - .context("field id limit exceeded")?; + .ok_or(UserError::AttributeLimitReached)?; } self.index.put_searchable_fields(self.wtxn, &names)?; @@ -303,7 +304,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(true) } - fn update_stop_words(&mut self) -> anyhow::Result { + fn update_stop_words(&mut self) -> Result { match self.stop_words { Setting::Set(ref stop_words) => { let current = self.index.stop_words(self.wtxn)?; @@ -325,7 +326,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } - fn update_synonyms(&mut self) -> anyhow::Result { + fn update_synonyms(&mut self) -> Result { match self.synonyms { Setting::Set(ref synonyms) => { fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec { @@ -383,13 +384,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { } } - fn update_filterable(&mut self) -> anyhow::Result<()> { + fn update_filterable(&mut self) -> Result<()> { match self.filterable_fields { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_facets = HashSet::new(); for name in fields { - fields_ids_map.insert(name).context("field id limit exceeded")?; + fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; new_facets.insert(name.clone()); } self.index.put_filterable_fields(self.wtxn, &new_facets)?; @@ -401,7 +402,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } - fn update_criteria(&mut self) -> anyhow::Result<()> { + fn update_criteria(&mut self) -> Result<()> { match self.criteria { Setting::Set(ref fields) => { let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; @@ -409,7 +410,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { for name in fields { let criterion: Criterion = name.parse()?; if let Some(name) = criterion.field_name() { - fields_ids_map.insert(name).context("field id limit exceeded")?; + fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; } new_criteria.push(criterion); } @@ -422,7 +423,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } - pub fn execute(mut self, progress_callback: F) -> anyhow::Result<()> + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep, u64) + Sync { diff --git a/milli/src/update/update_builder.rs b/milli/src/update/update_builder.rs index 8d6eb034d..1d0e776b1 100644 --- a/milli/src/update/update_builder.rs +++ b/milli/src/update/update_builder.rs @@ -1,7 +1,7 @@ use grenad::CompressionType; use rayon::ThreadPool; -use crate::Index; +use crate::{Index, Result}; use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets}; pub struct UpdateBuilder<'a> { @@ -76,7 +76,7 @@ impl<'a> UpdateBuilder<'a> { self, wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index, - ) -> anyhow::Result> + ) -> Result> { DeleteDocuments::new(wtxn, index, self.update_id) } diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 0544f8789..a2197b28c 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -5,6 +5,7 @@ use fst::Streamer; use grenad::CompressionType; use heed::types::ByteSlice; +use crate::Result; use crate::update::index_documents::WriteMethod; use crate::update::index_documents::{ create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database, @@ -33,7 +34,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { } } - pub fn execute(self) -> anyhow::Result<()> { + pub fn execute(self) -> Result<()> { // Clear the word prefix docids database. self.index.word_prefix_docids.clear(self.wtxn)?; diff --git a/milli/src/update/word_prefix_pair_proximity_docids.rs b/milli/src/update/word_prefix_pair_proximity_docids.rs index c6b935e54..9019b26e5 100644 --- a/milli/src/update/word_prefix_pair_proximity_docids.rs +++ b/milli/src/update/word_prefix_pair_proximity_docids.rs @@ -7,7 +7,7 @@ use heed::BytesEncode; use heed::types::ByteSlice; use log::debug; -use crate::Index; +use crate::{Index, Result}; use crate::heed_codec::StrStrU8Codec; use crate::update::index_documents::{ WriteMethod, create_sorter, sorter_into_lmdb_database, @@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> { } } - pub fn execute(self) -> anyhow::Result<()> { + pub fn execute(self) -> Result<()> { debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?; diff --git a/milli/src/update/words_level_positions.rs b/milli/src/update/words_level_positions.rs index f94507aab..e2e3f7b4c 100644 --- a/milli/src/update/words_level_positions.rs +++ b/milli/src/update/words_level_positions.rs @@ -11,7 +11,9 @@ use heed::{BytesEncode, Error}; use log::debug; use roaring::RoaringBitmap; +use crate::error::InternalError; use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec}; +use crate::Result; use crate::update::index_documents::WriteMethod; use crate::update::index_documents::{ create_writer, create_sorter, writer_into_reader, write_into_lmdb_database, @@ -56,7 +58,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> { self } - pub fn execute(self) -> anyhow::Result<()> { + pub fn execute(self) -> Result<()> { debug!("Computing and writing the word levels positions docids into LMDB on disk..."); let entries = compute_positions_levels( @@ -78,7 +80,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> { self.wtxn, *self.index.word_level_position_docids.as_polymorph(), entries, - |_, _| anyhow::bail!("invalid word level position merging"), + |_, _| Err(InternalError::IndexingMergingKeys { process: "word level position" }), WriteMethod::Append, )?; @@ -142,7 +144,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> { self.wtxn, *self.index.word_prefix_level_position_docids.as_polymorph(), entries, - |_, _| anyhow::bail!("invalid word prefix level position merging"), + |_, _| Err(InternalError::IndexingMergingKeys { process: "word prefix level position" }), WriteMethod::Append, )?; @@ -174,7 +176,7 @@ fn compute_positions_levels( shrink_size: Option, level_group_size: NonZeroU32, min_level_size: NonZeroU32, -) -> anyhow::Result> +) -> Result> { // It is forbidden to keep a cursor and write in a database at the same time with LMDB // therefore we write the facet levels entries into a grenad file before transfering them. @@ -251,7 +253,7 @@ fn write_level_entry( left: u32, right: u32, ids: &RoaringBitmap, -) -> anyhow::Result<()> +) -> Result<()> { let key = (word, level, left, right); let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?; diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index f53b0ee00..d1aa267b8 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -2,7 +2,7 @@ use std::iter::FromIterator; use std::str; use fst::Streamer; -use crate::{Index, SmallString32}; +use crate::{Index, SmallString32, Result}; pub struct WordsPrefixesFst<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -48,7 +48,7 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { self } - pub fn execute(self) -> anyhow::Result<()> { + pub fn execute(self) -> Result<()> { let words_fst = self.index.words_fst(&self.wtxn)?; let number_of_words = words_fst.len(); let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;