Use the Error enum everywhere in the project

This commit is contained in:
Kerollmops 2021-06-14 16:46:19 +02:00
parent ca78cb5aca
commit 312c2d1d8e
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
35 changed files with 385 additions and 300 deletions

1
Cargo.lock generated
View File

@ -1377,7 +1377,6 @@ dependencies = [
name = "milli" name = "milli"
version = "0.3.1" version = "0.3.1"
dependencies = [ dependencies = [
"anyhow",
"big_s", "big_s",
"bstr", "bstr",
"byteorder", "byteorder",

View File

@ -5,7 +5,6 @@ authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018" edition = "2018"
[dependencies] [dependencies]
anyhow = "1.0.38"
bstr = "0.2.15" bstr = "0.2.15"
byteorder = "1.4.2" byteorder = "1.4.2"
chrono = { version = "0.4.19", features = ["serde"] } chrono = { version = "0.4.19", features = ["serde"] }

View File

@ -1,11 +1,12 @@
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use anyhow::{Context, bail};
use regex::Regex; use regex::Regex;
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use crate::error::{Error, UserError};
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| { static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap() Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
}); });
@ -41,7 +42,7 @@ impl Criterion {
} }
impl FromStr for Criterion { impl FromStr for Criterion {
type Err = anyhow::Error; type Err = Error;
fn from_str(txt: &str) -> Result<Criterion, Self::Err> { fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt { match txt {
@ -51,13 +52,15 @@ impl FromStr for Criterion {
"attribute" => Ok(Criterion::Attribute), "attribute" => Ok(Criterion::Attribute),
"exactness" => Ok(Criterion::Exactness), "exactness" => Ok(Criterion::Exactness),
text => { text => {
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; let caps = ASC_DESC_REGEX.captures(text).ok_or_else(|| {
UserError::InvalidCriterionName { name: text.to_string() }
})?;
let order = caps.get(1).unwrap().as_str(); let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str();
match order { match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())), "asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())), "desc" => Ok(Criterion::Desc(field_name.to_string())),
otherwise => bail!("unknown criterion name: {}", otherwise), text => return Err(UserError::InvalidCriterionName { name: text.to_string() }.into()),
} }
}, },
} }

View File

@ -2,14 +2,14 @@ use std::borrow::Cow;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::Path; use std::path::Path;
use anyhow::Context;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use heed::{Database, PolyDatabase, RoTxn, RwTxn};
use heed::types::*; use heed::types::*;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search}; use crate::{Criterion, default_criteria, FacetDistribution, FieldsDistribution, Search};
use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId}; use crate::{BEU32, DocumentId, ExternalDocumentsIds, FieldId, Result};
use crate::{ use crate::{
BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec,
ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec, ObkvCodec, RoaringBitmapCodec, RoaringBitmapLenCodec, StrLevelPositionCodec, StrStrU8Codec,
@ -84,7 +84,7 @@ pub struct Index {
} }
impl Index { impl Index {
pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> anyhow::Result<Index> { pub fn new<P: AsRef<Path>>(mut options: heed::EnvOpenOptions, path: P) -> Result<Index> {
options.max_dbs(14); options.max_dbs(14);
let env = options.open(path)?; let env = options.open(path)?;
@ -173,7 +173,7 @@ impl Index {
} }
/// Returns the number of documents indexed in the database. /// Returns the number of documents indexed in the database.
pub fn number_of_documents(&self, rtxn: &RoTxn) -> anyhow::Result<u64> { pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> {
let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?; let count = self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, DOCUMENTS_IDS_KEY)?;
Ok(count.unwrap_or_default()) Ok(count.unwrap_or_default())
} }
@ -215,7 +215,7 @@ impl Index {
/// Returns the external documents ids map which associate the external ids /// Returns the external documents ids map which associate the external ids
/// with the internal ids (i.e. `u32`). /// with the internal ids (i.e. `u32`).
pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<ExternalDocumentsIds<'t>> { pub fn external_documents_ids<'t>(&self, rtxn: &'t RoTxn) -> Result<ExternalDocumentsIds<'t>> {
let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?; let hard = self.main.get::<_, Str, ByteSlice>(rtxn, HARD_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?; let soft = self.main.get::<_, Str, ByteSlice>(rtxn, SOFT_EXTERNAL_DOCUMENTS_IDS_KEY)?;
let hard = match hard { let hard = match hard {
@ -504,7 +504,7 @@ impl Index {
} }
/// Returns the FST which is the words dictionary of the engine. /// Returns the FST which is the words dictionary of the engine.
pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> { pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? { match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@ -521,7 +521,7 @@ impl Index {
self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY) self.main.delete::<_, Str>(wtxn, STOP_WORDS_KEY)
} }
pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<Option<fst::Set<&'t [u8]>>> { pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? { match self.main.get::<_, Str, ByteSlice>(rtxn, STOP_WORDS_KEY)? {
Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), Some(bytes) => Ok(Some(fst::Set::new(bytes)?)),
None => Ok(None), None => Ok(None),
@ -555,7 +555,7 @@ impl Index {
} }
/// Returns the FST which is the words prefixes dictionnary of the engine. /// Returns the FST which is the words prefixes dictionnary of the engine.
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> anyhow::Result<fst::Set<Cow<'t, [u8]>>> { pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? { match self.main.get::<_, Str, ByteSlice>(rtxn, WORDS_PREFIXES_FST_KEY)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?), None => Ok(fst::Set::default().map_data(Cow::Owned)?),
@ -577,13 +577,13 @@ impl Index {
&self, &self,
rtxn: &'t RoTxn, rtxn: &'t RoTxn,
ids: impl IntoIterator<Item=DocumentId>, ids: impl IntoIterator<Item=DocumentId>,
) -> anyhow::Result<Vec<(DocumentId, obkv::KvReader<'t>)>> ) -> Result<Vec<(DocumentId, obkv::KvReader<'t>)>>
{ {
let mut documents = Vec::new(); let mut documents = Vec::new();
for id in ids { for id in ids {
let kv = self.documents.get(rtxn, &BEU32::new(id))? let kv = self.documents.get(rtxn, &BEU32::new(id))?
.with_context(|| format!("Could not find document {}", id))?; .ok_or_else(|| UserError::UnknownInternalDocumentId { document_id: id })?;
documents.push((id, kv)); documents.push((id, kv));
} }
@ -594,7 +594,7 @@ impl Index {
pub fn all_documents<'t>( pub fn all_documents<'t>(
&self, &self,
rtxn: &'t RoTxn, rtxn: &'t RoTxn,
) -> anyhow::Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> { ) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReader<'t>)>>> {
Ok(self Ok(self
.documents .documents
.iter(rtxn)? .iter(rtxn)?

View File

@ -15,12 +15,13 @@ pub mod update;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use std::result::Result as StdResult;
use anyhow::Context;
use fxhash::{FxHasher32, FxHasher64}; use fxhash::{FxHasher32, FxHasher64};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
pub use self::criterion::{Criterion, default_criteria}; pub use self::criterion::{Criterion, default_criteria};
pub use self::error::Error;
pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fields_ids_map::FieldsIdsMap; pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec}; pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec, ObkvCodec, FieldIdWordCountCodec};
@ -30,6 +31,8 @@ pub use self::index::Index;
pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords}; pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
pub use self::tree_level::TreeLevel; pub use self::tree_level::TreeLevel;
pub type Result<T> = std::result::Result<T, error::Error>;
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>; pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
pub type SmallString32 = smallstr::SmallString<[u8; 32]>; pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
@ -44,21 +47,24 @@ pub type FieldId = u8;
pub type Position = u32; pub type Position = u32;
pub type FieldsDistribution = HashMap<String, u64>; pub type FieldsDistribution = HashMap<String, u64>;
type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> anyhow::Result<Vec<u8>>; type MergeFn<E> = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Vec<u8>, E>;
/// Transform a raw obkv store into a JSON Object. /// Transform a raw obkv store into a JSON Object.
pub fn obkv_to_json( pub fn obkv_to_json(
displayed_fields: &[FieldId], displayed_fields: &[FieldId],
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
obkv: obkv::KvReader, obkv: obkv::KvReader,
) -> anyhow::Result<Map<String, Value>> ) -> Result<Map<String, Value>>
{ {
displayed_fields.iter() displayed_fields.iter()
.copied() .copied()
.flat_map(|id| obkv.get(id).map(|value| (id, value))) .flat_map(|id| obkv.get(id).map(|value| (id, value)))
.map(|(id, value)| { .map(|(id, value)| {
let name = fields_ids_map.name(id).context("unknown obkv field id")?; let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
let value = serde_json::from_slice(value)?; field_id: id,
from_db_name: "documents",
})?;
let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
Ok((name.to_owned(), value)) Ok((name.to_owned(), value))
}) })
.collect() .collect()

View File

@ -1,15 +1,15 @@
use std::mem::take; use std::mem::take;
use anyhow::Context;
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::FieldIdMapMissingEntry;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::FacetIter; use crate::search::facet::FacetIter;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::{FieldId, Index}; use crate::{FieldId, Index, Result};
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
/// Threshold on the number of candidates that will make /// Threshold on the number of candidates that will make
@ -36,7 +36,7 @@ impl<'t> AscDesc<'t> {
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
field_name: String, field_name: String,
) -> anyhow::Result<Self> { ) -> Result<Self> {
Self::new(index, rtxn, parent, field_name, true) Self::new(index, rtxn, parent, field_name, true)
} }
@ -45,7 +45,7 @@ impl<'t> AscDesc<'t> {
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
field_name: String, field_name: String,
) -> anyhow::Result<Self> { ) -> Result<Self> {
Self::new(index, rtxn, parent, field_name, false) Self::new(index, rtxn, parent, field_name, false)
} }
@ -55,11 +55,14 @@ impl<'t> AscDesc<'t> {
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
field_name: String, field_name: String,
ascending: bool, ascending: bool,
) -> anyhow::Result<Self> { ) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map let field_id = fields_ids_map
.id(&field_name) .id(&field_name)
.with_context(|| format!("field {:?} isn't registered", field_name))?; .ok_or_else(|| FieldIdMapMissingEntry::FieldName {
field_name: field_name.clone(),
from_db_name: "asc-desc",
})?;
Ok(AscDesc { Ok(AscDesc {
index, index,
@ -79,7 +82,7 @@ impl<'t> AscDesc<'t> {
impl<'t> Criterion for AscDesc<'t> { impl<'t> Criterion for AscDesc<'t> {
#[logging_timer::time("AscDesc::{}")] #[logging_timer::time("AscDesc::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
self.allowed_candidates -= params.excluded_candidates; self.allowed_candidates -= params.excluded_candidates;
@ -162,7 +165,7 @@ fn facet_ordered<'t>(
field_id: FieldId, field_id: FieldId,
ascending: bool, ascending: bool,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> { ) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
if candidates.len() <= CANDIDATES_THRESHOLD { if candidates.len() <= CANDIDATES_THRESHOLD {
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>) Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
@ -186,7 +189,7 @@ fn iterative_facet_ordered_iter<'t>(
field_id: FieldId, field_id: FieldId,
ascending: bool, ascending: bool,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> anyhow::Result<impl Iterator<Item = RoaringBitmap> + 't> { ) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
let mut docids_values = Vec::with_capacity(candidates.len() as usize); let mut docids_values = Vec::with_capacity(candidates.len() as usize);
for docid in candidates.iter() { for docid in candidates.iter() {
let left = (field_id, docid, f64::MIN); let left = (field_id, docid, f64::MIN);

View File

@ -5,7 +5,7 @@ use std::mem::take;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{TreeLevel, search::build_dfa}; use crate::{TreeLevel, Result, search::build_dfa};
use crate::search::criteria::Query; use crate::search::criteria::Query;
use crate::search::query_tree::{Operation, QueryKind}; use crate::search::query_tree::{Operation, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, WordDerivationsCache};
@ -48,7 +48,7 @@ impl<'t> Attribute<'t> {
impl<'t> Criterion for Attribute<'t> { impl<'t> Criterion for Attribute<'t> {
#[logging_timer::time("Attribute::{}")] #[logging_timer::time("Attribute::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some((_, _, allowed_candidates)) = self.state.as_mut() { if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*allowed_candidates -= params.excluded_candidates; *allowed_candidates -= params.excluded_candidates;
@ -224,7 +224,12 @@ struct QueryLevelIterator<'t, 'q> {
} }
impl<'t, 'q> QueryLevelIterator<'t, 'q> { impl<'t, 'q> QueryLevelIterator<'t, 'q> {
fn new(ctx: &'t dyn Context<'t>, queries: &'q [Query], wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<Self>> { fn new(
ctx: &'t dyn Context<'t>,
queries: &'q [Query],
wdcache: &mut WordDerivationsCache,
) -> Result<Option<Self>>
{
let mut inner = Vec::with_capacity(queries.len()); let mut inner = Vec::with_capacity(queries.len());
for query in queries { for query in queries {
match &query.kind { match &query.kind {
@ -471,7 +476,7 @@ fn initialize_query_level_iterators<'t, 'q>(
branches: &'q FlattenedQueryTree, branches: &'q FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> { ) -> Result<BinaryHeap<Branch<'t, 'q>>> {
let mut positions = BinaryHeap::with_capacity(branches.len()); let mut positions = BinaryHeap::with_capacity(branches.len());
for branch in branches { for branch in branches {
@ -521,7 +526,7 @@ fn set_compute_candidates<'t>(
branches: &FlattenedQueryTree, branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Option<RoaringBitmap>> ) -> Result<Option<RoaringBitmap>>
{ {
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?; let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
let lowest_level = TreeLevel::min_value(); let lowest_level = TreeLevel::min_value();
@ -573,7 +578,7 @@ fn linear_compute_candidates(
ctx: &dyn Context, ctx: &dyn Context,
branches: &FlattenedQueryTree, branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>> ) -> Result<BTreeMap<u64, RoaringBitmap>>
{ {
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 { fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
let mut min_rank = u64::max_value(); let mut min_rank = u64::max_value();

View File

@ -14,7 +14,7 @@ use crate::search::criteria::{
CriterionResult, CriterionResult,
resolve_query_tree, resolve_query_tree,
}; };
use crate::TreeLevel; use crate::{TreeLevel, Result};
pub struct Exactness<'t> { pub struct Exactness<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
@ -45,7 +45,7 @@ impl<'t> Exactness<'t> {
impl<'t> Criterion for Exactness<'t> { impl<'t> Criterion for Exactness<'t> {
#[logging_timer::time("Exactness::{}")] #[logging_timer::time("Exactness::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some(state) = self.state.as_mut() { if let Some(state) = self.state.as_mut() {
state.difference_with(params.excluded_candidates); state.difference_with(params.excluded_candidates);
@ -158,7 +158,7 @@ fn resolve_state(
ctx: &dyn Context, ctx: &dyn Context,
state: State, state: State,
query: &[ExactQueryPart], query: &[ExactQueryPart],
) -> anyhow::Result<(RoaringBitmap, Option<State>)> ) -> Result<(RoaringBitmap, Option<State>)>
{ {
use State::*; use State::*;
match state { match state {

View File

@ -1,6 +1,7 @@
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::Result;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache; use crate::search::WordDerivationsCache;
use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context}; use super::{resolve_query_tree, Criterion, CriterionResult, CriterionParameters, Context};
@ -29,7 +30,7 @@ impl<'t> Final<'t> {
} }
#[logging_timer::time("Final::{}")] #[logging_timer::time("Final::{}")]
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> { pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result<Option<FinalResult>> {
debug!("Final iteration"); debug!("Final iteration");
let excluded_candidates = &self.returned_candidates | excluded_candidates; let excluded_candidates = &self.returned_candidates | excluded_candidates;
let mut criterion_parameters = CriterionParameters { let mut criterion_parameters = CriterionParameters {

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::Result;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use super::{Criterion, CriterionResult, CriterionParameters}; use super::{Criterion, CriterionResult, CriterionParameters};
pub struct Initial { pub struct Initial {
@ -22,7 +22,7 @@ impl Initial {
impl Criterion for Initial { impl Criterion for Initial {
#[logging_timer::time("Initial::{}")] #[logging_timer::time("Initial::{}")]
fn next(&mut self, _: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, _: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
Ok(self.answer.take()) Ok(self.answer.take())
} }
} }

View File

@ -4,7 +4,7 @@ use std::borrow::Cow;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}}; use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}};
use crate::{Index, DocumentId}; use crate::{Index, DocumentId, Result};
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use self::asc_desc::AscDesc; use self::asc_desc::AscDesc;
@ -26,7 +26,7 @@ mod words;
pub mod r#final; pub mod r#final;
pub trait Criterion { pub trait Criterion {
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>>; fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>>;
} }
/// The result of a call to the parent criterion. /// The result of a call to the parent criterion.
@ -78,8 +78,9 @@ pub trait Context<'c> {
fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>; fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>;
fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>>; fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>>;
fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>; fn field_id_word_count_docids(&self, field_id: FieldId, word_count: u8) -> heed::Result<Option<RoaringBitmap>>;
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>; fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>>;
} }
pub struct CriteriaBuilder<'t> { pub struct CriteriaBuilder<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
index: &'t Index, index: &'t Index,
@ -185,14 +186,14 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
self.index.field_id_word_count_docids.get(self.rtxn, &key) self.index.field_id_word_count_docids.get(self.rtxn, &key)
} }
fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> { fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> heed::Result<Option<RoaringBitmap>> {
let key = (word, level, left, right); let key = (word, level, left, right);
self.index.word_level_position_docids.get(self.rtxn, &key) self.index.word_level_position_docids.get(self.rtxn, &key)
} }
} }
impl<'t> CriteriaBuilder<'t> { impl<'t> CriteriaBuilder<'t> {
pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> anyhow::Result<Self> { pub fn new(rtxn: &'t heed::RoTxn<'t>, index: &'t Index) -> Result<Self> {
let words_fst = index.words_fst(rtxn)?; let words_fst = index.words_fst(rtxn)?;
let words_prefixes_fst = index.words_prefixes_fst(rtxn)?; let words_prefixes_fst = index.words_prefixes_fst(rtxn)?;
Ok(Self { rtxn, index, words_fst, words_prefixes_fst }) Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
@ -203,7 +204,7 @@ impl<'t> CriteriaBuilder<'t> {
query_tree: Option<Operation>, query_tree: Option<Operation>,
primitive_query: Option<Vec<PrimitiveQueryPart>>, primitive_query: Option<Vec<PrimitiveQueryPart>>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
) -> anyhow::Result<Final<'t>> ) -> Result<Final<'t>>
{ {
use crate::criterion::Criterion as Name; use crate::criterion::Criterion as Name;
@ -230,13 +231,13 @@ pub fn resolve_query_tree<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
query_tree: &Operation, query_tree: &Operation,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
fn resolve_operation<'t>( fn resolve_operation<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
query_tree: &Operation, query_tree: &Operation,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
use Operation::{And, Phrase, Or, Query}; use Operation::{And, Phrase, Or, Query};
@ -244,7 +245,7 @@ pub fn resolve_query_tree<'t>(
And(ops) => { And(ops) => {
let mut ops = ops.iter().map(|op| { let mut ops = ops.iter().map(|op| {
resolve_operation(ctx, op, wdcache) resolve_operation(ctx, op, wdcache)
}).collect::<anyhow::Result<Vec<_>>>()?; }).collect::<Result<Vec<_>>>()?;
ops.sort_unstable_by_key(|cds| cds.len()); ops.sort_unstable_by_key(|cds| cds.len());
@ -302,7 +303,7 @@ fn all_word_pair_proximity_docids<T: AsRef<str>, U: AsRef<str>>(
left_words: &[(T, u8)], left_words: &[(T, u8)],
right_words: &[(U, u8)], right_words: &[(U, u8)],
proximity: u8 proximity: u8
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (left, _l_typo) in left_words { for (left, _l_typo) in left_words {
@ -318,7 +319,7 @@ fn query_docids(
ctx: &dyn Context, ctx: &dyn Context,
query: &Query, query: &Query,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
match &query.kind { match &query.kind {
QueryKind::Exact { word, .. } => { QueryKind::Exact { word, .. } => {
@ -354,7 +355,7 @@ fn query_pair_proximity_docids(
right: &Query, right: &Query,
proximity: u8, proximity: u8,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
if proximity >= 8 { if proximity >= 8 {
let mut candidates = query_docids(ctx, left, wdcache)?; let mut candidates = query_docids(ctx, left, wdcache)?;
@ -481,7 +482,7 @@ pub mod test {
todo!() todo!()
} }
fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> Result<Option<RoaringBitmap>, heed::Error> { fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> heed::Result<Option<RoaringBitmap>> {
todo!() todo!()
} }

View File

@ -5,9 +5,10 @@ use std::mem::take;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use log::debug; use log::debug;
use crate::{DocumentId, Position, search::{query_tree::QueryKind}};
use crate::search::query_tree::{maximum_proximity, Operation, Query}; use crate::search::query_tree::{maximum_proximity, Operation, Query};
use crate::search::{build_dfa, WordDerivationsCache}; use crate::search::{build_dfa, WordDerivationsCache};
use crate::search::{query_tree::QueryKind};
use crate::{DocumentId, Position, Result};
use super::{ use super::{
Context, Context,
Criterion, Criterion,
@ -55,7 +56,7 @@ impl<'t> Proximity<'t> {
impl<'t> Criterion for Proximity<'t> { impl<'t> Criterion for Proximity<'t> {
#[logging_timer::time("Proximity::{}")] #[logging_timer::time("Proximity::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some((_, _, allowed_candidates)) = self.state.as_mut() { if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*allowed_candidates -= params.excluded_candidates; *allowed_candidates -= params.excluded_candidates;
@ -161,7 +162,7 @@ fn resolve_candidates<'t>(
proximity: u8, proximity: u8,
cache: &mut Cache, cache: &mut Cache,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
fn resolve_operation<'t>( fn resolve_operation<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
@ -169,7 +170,7 @@ fn resolve_candidates<'t>(
proximity: u8, proximity: u8,
cache: &mut Cache, cache: &mut Cache,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{ {
use Operation::{And, Phrase, Or}; use Operation::{And, Phrase, Or};
@ -227,7 +228,7 @@ fn resolve_candidates<'t>(
proximity: u8, proximity: u8,
cache: &mut Cache, cache: &mut Cache,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{ {
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> { fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
(0..=mana.min(left_max)).map(move |m| (m, mana - m)) (0..=mana.min(left_max)).map(move |m| (m, mana - m))
@ -281,7 +282,7 @@ fn resolve_candidates<'t>(
proximity: u8, proximity: u8,
cache: &mut Cache, cache: &mut Cache,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> ) -> Result<Vec<(Query, Query, RoaringBitmap)>>
{ {
// Extract the first two elements but gives the tail // Extract the first two elements but gives the tail
// that is just after the first element. // that is just after the first element.
@ -324,13 +325,13 @@ fn resolve_plane_sweep_candidates(
query_tree: &Operation, query_tree: &Operation,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> ) -> Result<BTreeMap<u8, RoaringBitmap>>
{ {
/// FIXME may be buggy with query like "new new york" /// FIXME may be buggy with query like "new new york"
fn plane_sweep( fn plane_sweep(
groups_positions: Vec<Vec<(Position, u8, Position)>>, groups_positions: Vec<Vec<(Position, u8, Position)>>,
consecutive: bool, consecutive: bool,
) -> anyhow::Result<Vec<(Position, u8, Position)>> ) -> Result<Vec<(Position, u8, Position)>>
{ {
fn compute_groups_proximity( fn compute_groups_proximity(
groups: &[(usize, (Position, u8, Position))], groups: &[(usize, (Position, u8, Position))],
@ -451,7 +452,7 @@ fn resolve_plane_sweep_candidates(
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>, rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
words_positions: &HashMap<String, RoaringBitmap>, words_positions: &HashMap<String, RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Vec<(Position, u8, Position)>> ) -> Result<Vec<(Position, u8, Position)>>
{ {
use Operation::{And, Phrase, Or}; use Operation::{And, Phrase, Or};

View File

@ -5,6 +5,7 @@ use roaring::RoaringBitmap;
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, WordDerivationsCache};
use crate::Result;
use super::{ use super::{
Candidates, Candidates,
Context, Context,
@ -43,7 +44,7 @@ impl<'t> Typo<'t> {
impl<'t> Criterion for Typo<'t> { impl<'t> Criterion for Typo<'t> {
#[logging_timer::time("Typo::{}")] #[logging_timer::time("Typo::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
use Candidates::{Allowed, Forbidden}; use Candidates::{Allowed, Forbidden};
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
match self.state.as_mut() { match self.state.as_mut() {
@ -163,14 +164,14 @@ fn alterate_query_tree(
mut query_tree: Operation, mut query_tree: Operation,
number_typos: u8, number_typos: u8,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Operation> ) -> Result<Operation>
{ {
fn recurse( fn recurse(
words_fst: &fst::Set<Cow<[u8]>>, words_fst: &fst::Set<Cow<[u8]>>,
operation: &mut Operation, operation: &mut Operation,
number_typos: u8, number_typos: u8,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<()> ) -> Result<()>
{ {
use Operation::{And, Phrase, Or}; use Operation::{And, Phrase, Or};
@ -218,7 +219,7 @@ fn resolve_candidates<'t>(
number_typos: u8, number_typos: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>, cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
fn resolve_operation<'t>( fn resolve_operation<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
@ -226,7 +227,7 @@ fn resolve_candidates<'t>(
number_typos: u8, number_typos: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>, cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
use Operation::{And, Phrase, Or, Query}; use Operation::{And, Phrase, Or, Query};
@ -277,7 +278,7 @@ fn resolve_candidates<'t>(
mana: u8, mana: u8,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>, cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
match branches.split_first() { match branches.split_first() {
Some((head, [])) => { Some((head, [])) => {

View File

@ -4,6 +4,7 @@ use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::Result;
use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree}; use super::{Context, Criterion, CriterionParameters, CriterionResult, resolve_query_tree};
pub struct Words<'t> { pub struct Words<'t> {
@ -30,7 +31,7 @@ impl<'t> Words<'t> {
impl<'t> Criterion for Words<'t> { impl<'t> Criterion for Words<'t> {
#[logging_timer::time("Words::{}")] #[logging_timer::time("Words::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some(candidates) = self.candidates.as_mut() { if let Some(candidates) = self.candidates.as_mut() {
*candidates -= params.excluded_candidates; *candidates -= params.excluded_candidates;

View File

@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
use super::{Distinct, DocIter}; use super::{Distinct, DocIter};
use crate::heed_codec::facet::*; use crate::heed_codec::facet::*;
use crate::{DocumentId, FieldId, Index}; use crate::{DocumentId, FieldId, Index, Result};
const FID_SIZE: usize = size_of::<FieldId>(); const FID_SIZE: usize = size_of::<FieldId>();
const DOCID_SIZE: usize = size_of::<DocumentId>(); const DOCID_SIZE: usize = size_of::<DocumentId>();
@ -57,7 +57,7 @@ impl<'a> FacetDistinctIter<'a> {
.get(self.txn, &(self.distinct, 0, key, key)) .get(self.txn, &(self.distinct, 0, key, key))
} }
fn distinct_string(&mut self, id: DocumentId) -> anyhow::Result<()> { fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?; let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
for item in iter { for item in iter {
@ -73,7 +73,7 @@ impl<'a> FacetDistinctIter<'a> {
Ok(()) Ok(())
} }
fn distinct_number(&mut self, id: DocumentId) -> anyhow::Result<()> { fn distinct_number(&mut self, id: DocumentId) -> Result<()> {
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?; let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
for item in iter { for item in iter {
@ -92,7 +92,7 @@ impl<'a> FacetDistinctIter<'a> {
/// Performs the next iteration of the facet distinct. This is a convenience method that is /// Performs the next iteration of the facet distinct. This is a convenience method that is
/// called by the Iterator::next implementation that transposes the result. It makes error /// called by the Iterator::next implementation that transposes the result. It makes error
/// handling easier. /// handling easier.
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> { fn next_inner(&mut self) -> Result<Option<DocumentId>> {
// The first step is to remove all the excluded documents from our candidates // The first step is to remove all the excluded documents from our candidates
self.candidates.difference_with(&self.excluded); self.candidates.difference_with(&self.excluded);
@ -129,7 +129,7 @@ fn facet_number_values<'a>(
distinct: FieldId, distinct: FieldId,
index: &Index, index: &Index,
txn: &'a heed::RoTxn, txn: &'a heed::RoTxn,
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> { ) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, heed::types::Unit>> {
let key = facet_values_prefix_key(distinct, id); let key = facet_values_prefix_key(distinct, id);
let iter = index let iter = index
@ -146,7 +146,7 @@ fn facet_string_values<'a>(
distinct: FieldId, distinct: FieldId,
index: &Index, index: &Index,
txn: &'a heed::RoTxn, txn: &'a heed::RoTxn,
) -> anyhow::Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> { ) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, heed::types::Unit>> {
let key = facet_values_prefix_key(distinct, id); let key = facet_values_prefix_key(distinct, id);
let iter = index let iter = index
@ -159,7 +159,7 @@ fn facet_string_values<'a>(
} }
impl Iterator for FacetDistinctIter<'_> { impl Iterator for FacetDistinctIter<'_> {
type Item = anyhow::Result<DocumentId>; type Item = Result<DocumentId>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.next_inner().transpose() self.next_inner().transpose()

View File

@ -3,13 +3,13 @@ mod noop_distinct;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::DocumentId; use crate::{DocumentId, Result};
pub use facet_distinct::FacetDistinct; pub use facet_distinct::FacetDistinct;
pub use noop_distinct::NoopDistinct; pub use noop_distinct::NoopDistinct;
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
/// It provides a way to get back the ownership to the excluded set. /// It provides a way to get back the ownership to the excluded set.
pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> { pub trait DocIter: Iterator<Item = Result<DocumentId>> {
/// Returns ownership on the internal exluded set. /// Returns ownership on the internal exluded set.
fn into_excluded(self) -> RoaringBitmap; fn into_excluded(self) -> RoaringBitmap;
} }
@ -106,7 +106,7 @@ mod test {
/// Checks that all the candidates are distinct, and returns the candidates number. /// Checks that all the candidates are distinct, and returns the candidates number.
pub(crate) fn validate_distinct_candidates( pub(crate) fn validate_distinct_candidates(
candidates: impl Iterator<Item=anyhow::Result<DocumentId>>, candidates: impl Iterator<Item = crate::Result<DocumentId>>,
distinct: FieldId, distinct: FieldId,
index: &Index, index: &Index,
) -> usize { ) -> usize {

View File

@ -1,6 +1,6 @@
use roaring::{RoaringBitmap, bitmap::IntoIter}; use roaring::{RoaringBitmap, bitmap::IntoIter};
use crate::DocumentId; use crate::{DocumentId, Result};
use super::{DocIter, Distinct}; use super::{DocIter, Distinct};
/// A distinct implementer that does not perform any distinct, /// A distinct implementer that does not perform any distinct,
@ -13,7 +13,7 @@ pub struct NoopDistinctIter {
} }
impl Iterator for NoopDistinctIter { impl Iterator for NoopDistinctIter {
type Item = anyhow::Result<DocumentId>; type Item = Result<DocumentId>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.candidates.next().map(Ok) self.candidates.next().map(Ok)

View File

@ -2,15 +2,15 @@ use std::collections::{HashSet, BTreeMap};
use std::ops::Bound::Unbounded; use std::ops::Bound::Unbounded;
use std::{cmp, fmt}; use std::{cmp, fmt};
use anyhow::Context;
use heed::{Database, BytesDecode}; use heed::{Database, BytesDecode};
use heed::types::{ByteSlice, Unit}; use heed::types::{ByteSlice, Unit};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::FieldIdMapMissingEntry;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetValueStringCodec; use crate::heed_codec::facet::FacetValueStringCodec;
use crate::search::facet::{FacetIter, FacetRange}; use crate::search::facet::{FacetIter, FacetRange};
use crate::{Index, FieldId, DocumentId}; use crate::{Index, FieldId, DocumentId, Result};
/// The default number of values by facets that will /// The default number of values by facets that will
/// be fetched from the key-value store. /// be fetched from the key-value store.
@ -195,14 +195,15 @@ impl<'a> FacetDistribution<'a> {
} }
} }
pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let mut distribution = BTreeMap::new(); let mut distribution = BTreeMap::new();
for name in filterable_fields { for name in filterable_fields {
let fid = fields_ids_map.id(&name).with_context(|| { let fid = fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
format!("missing field name {:?} from the fields id map", name) field_name: name.clone(),
from_db_name: "filterable-fields",
})?; })?;
let values = self.facet_values(fid)?; let values = self.facet_values(fid)?;
distribution.insert(name, values); distribution.insert(name, values);

View File

@ -1,6 +1,7 @@
use std::collections::HashSet; use std::collections::HashSet;
use std::fmt::Debug; use std::fmt::Debug;
use std::ops::Bound::{self, Included, Excluded}; use std::ops::Bound::{self, Included, Excluded};
use std::result::Result as StdResult;
use std::str::FromStr; use std::str::FromStr;
use either::Either; use either::Either;
@ -11,8 +12,9 @@ use pest::iterators::{Pair, Pairs};
use pest::Parser; use pest::Parser;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec}; use crate::{Index, FieldId, FieldsIdsMap, CboRoaringBitmapCodec, Result};
use super::FacetRange; use super::FacetRange;
use super::parser::Rule; use super::parser::Rule;
@ -60,7 +62,7 @@ impl FilterCondition {
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
array: I, array: I,
) -> anyhow::Result<Option<FilterCondition>> ) -> Result<Option<FilterCondition>>
where I: IntoIterator<Item=Either<J, B>>, where I: IntoIterator<Item=Either<J, B>>,
J: IntoIterator<Item=A>, J: IntoIterator<Item=A>,
A: AsRef<str>, A: AsRef<str>,
@ -104,11 +106,11 @@ impl FilterCondition {
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
expression: &str, expression: &str,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields_ids(rtxn)?; let filterable_fields = index.filterable_fields_ids(rtxn)?;
let lexed = FilterParser::parse(Rule::prgm, expression)?; let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::FilterParsing)?;
FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
} }
@ -116,7 +118,7 @@ impl FilterCondition {
fim: &FieldsIdsMap, fim: &FieldsIdsMap,
ff: &HashSet<FieldId>, ff: &HashSet<FieldId>,
expression: Pairs<Rule>, expression: Pairs<Rule>,
) -> anyhow::Result<Self> ) -> Result<Self>
{ {
PREC_CLIMBER.climb( PREC_CLIMBER.climb(
expression, expression,
@ -133,7 +135,7 @@ impl FilterCondition {
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()), Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
_ => unreachable!(), _ => unreachable!(),
}, },
|lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| { |lhs: Result<Self>, op: Pair<Rule>, rhs: Result<Self>| {
match op.as_rule() { match op.as_rule() {
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))), Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))), Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
@ -158,16 +160,17 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let (lresult, _) = pest_parse(items.next().unwrap()); let (lresult, _) = pest_parse(items.next().unwrap());
let (rresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap());
let lvalue = lresult?; let lvalue = lresult.map_err(UserError::FilterParsing)?;
let rvalue = rresult?; let rvalue = rresult.map_err(UserError::FilterParsing)?;
Ok(Operator(fid, Between(lvalue, rvalue))) Ok(Operator(fid, Between(lvalue, rvalue)))
} }
@ -176,10 +179,11 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, svalue) = pest_parse(value); let (result, svalue) = pest_parse(value);
@ -192,60 +196,68 @@ impl FilterCondition {
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
let value = result.map_err(UserError::FilterParsing)?;
Ok(Operator(fid, GreaterThan(result?))) Ok(Operator(fid, GreaterThan(value)))
} }
fn greater_than_or_equal( fn greater_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
let value = result.map_err(UserError::FilterParsing)?;
Ok(Operator(fid, GreaterThanOrEqual(result?))) Ok(Operator(fid, GreaterThanOrEqual(value)))
} }
fn lower_than( fn lower_than(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
let value = result.map_err(UserError::FilterParsing)?;
Ok(Operator(fid, LowerThan(result?))) Ok(Operator(fid, LowerThan(value)))
} }
fn lower_than_or_equal( fn lower_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FilterCondition> ) -> Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, filterable_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)
.map_err(UserError::FilterParsing)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
let value = result.map_err(UserError::FilterParsing)?;
Ok(Operator(fid, LowerThanOrEqual(result?))) Ok(Operator(fid, LowerThanOrEqual(value)))
} }
} }
@ -260,7 +272,7 @@ impl FilterCondition {
left: Bound<f64>, left: Bound<f64>,
right: Bound<f64>, right: Bound<f64>,
output: &mut RoaringBitmap, output: &mut RoaringBitmap,
) -> anyhow::Result<()> ) -> Result<()>
{ {
match (left, right) { match (left, right) {
// If the request is an exact value we must go directly to the deepest level. // If the request is an exact value we must go directly to the deepest level.
@ -332,7 +344,7 @@ impl FilterCondition {
strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>, strings_db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
field_id: FieldId, field_id: FieldId,
operator: &Operator, operator: &Operator,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
// Make sure we always bound the ranges with the field id and the level, // Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the // as the facets values are all in the same database and prefixed by the
@ -390,7 +402,7 @@ impl FilterCondition {
&self, &self,
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
let numbers_db = index.facet_id_f64_docids; let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids; let strings_db = index.facet_id_string_docids;
@ -422,7 +434,7 @@ fn field_id(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
items: &mut Pairs<Rule>, items: &mut Pairs<Rule>,
) -> Result<FieldId, PestError<Rule>> ) -> StdResult<FieldId, PestError<Rule>>
{ {
// lexing ensures that we at least have a key // lexing ensures that we at least have a key
let key = items.next().unwrap(); let key = items.next().unwrap();
@ -463,7 +475,7 @@ fn field_id(
/// the original string that we tried to parse. /// the original string that we tried to parse.
/// ///
/// Returns the parsing error associated with the span if the conversion fails. /// Returns the parsing error associated with the span if the conversion fails.
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String) fn pest_parse<T>(pair: Pair<Rule>) -> (StdResult<T, pest::error::Error<Rule>>, String)
where T: FromStr, where T: FromStr,
T::Err: ToString, T::Err: ToString,
{ {

View File

@ -9,8 +9,9 @@ use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::{Index, FieldId}; use crate::{Index, FieldId};
pub use self::filter_condition::{FilterCondition, Operator};
pub use self::facet_distribution::FacetDistribution; pub use self::facet_distribution::FacetDistribution;
pub use self::filter_condition::{FilterCondition, Operator};
pub(crate) use self::parser::Rule as ParserRule;
mod filter_condition; mod filter_condition;
mod facet_distribution; mod facet_distribution;

View File

@ -2,6 +2,7 @@ use std::borrow::Cow;
use std::collections::hash_map::{Entry, HashMap}; use std::collections::hash_map::{Entry, HashMap};
use std::fmt; use std::fmt;
use std::mem::take; use std::mem::take;
use std::result::Result as StdResult;
use std::str::Utf8Error; use std::str::Utf8Error;
use std::time::Instant; use std::time::Instant;
@ -14,10 +15,11 @@ use roaring::bitmap::RoaringBitmap;
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
use crate::search::criteria::r#final::{Final, FinalResult}; use crate::search::criteria::r#final::{Final, FinalResult};
use crate::{Index, DocumentId}; use crate::{Index, DocumentId, Result};
pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator}; pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
pub use self::matching_words::MatchingWords; pub use self::matching_words::MatchingWords;
pub(crate) use self::facet::ParserRule;
use self::query_tree::QueryTreeBuilder; use self::query_tree::QueryTreeBuilder;
// Building these factories is not free. // Building these factories is not free.
@ -93,7 +95,7 @@ impl<'a> Search<'a> {
self self
} }
pub fn execute(&self) -> anyhow::Result<SearchResult> { pub fn execute(&self) -> Result<SearchResult> {
// We create the query tree by spliting the query into tokens. // We create the query tree by spliting the query into tokens.
let before = Instant::now(); let before = Instant::now();
let (query_tree, primitive_query) = match self.query.as_ref() { let (query_tree, primitive_query) = match self.query.as_ref() {
@ -152,7 +154,7 @@ impl<'a> Search<'a> {
mut distinct: D, mut distinct: D,
matching_words: MatchingWords, matching_words: MatchingWords,
mut criteria: Final, mut criteria: Final,
) -> anyhow::Result<SearchResult> ) -> Result<SearchResult>
{ {
let mut offset = self.offset; let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new(); let mut initial_candidates = RoaringBitmap::new();
@ -225,7 +227,7 @@ pub fn word_derivations<'c>(
max_typo: u8, max_typo: u8,
fst: &fst::Set<Cow<[u8]>>, fst: &fst::Set<Cow<[u8]>>,
cache: &'c mut WordDerivationsCache, cache: &'c mut WordDerivationsCache,
) -> Result<&'c [(String, u8)], Utf8Error> { ) -> StdResult<&'c [(String, u8)], Utf8Error> {
match cache.entry((word.to_string(), is_prefix, max_typo)) { match cache.entry((word.to_string(), is_prefix, max_typo)) {
Entry::Occupied(entry) => Ok(entry.into_mut()), Entry::Occupied(entry) => Ok(entry.into_mut()),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {

View File

@ -7,7 +7,7 @@ use meilisearch_tokenizer::TokenKind;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use crate::Index; use crate::{Index, Result};
type IsOptionalWord = bool; type IsOptionalWord = bool;
type IsPrefix = bool; type IsPrefix = bool;
@ -219,7 +219,7 @@ impl<'a> QueryTreeBuilder<'a> {
/// - if `authorize_typos` is set to `false` the query tree will be generated /// - if `authorize_typos` is set to `false` the query tree will be generated
/// forcing all query words to match documents without any typo /// forcing all query words to match documents without any typo
/// (the criterion `typo` will be ignored) /// (the criterion `typo` will be ignored)
pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> { pub fn build(&self, query: TokenStream) -> Result<Option<(Operation, PrimitiveQuery)>> {
let stop_words = self.index.stop_words(self.rtxn)?; let stop_words = self.index.stop_words(self.rtxn)?;
let primitive_query = create_primitive_query(query, stop_words, self.words_limit); let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
if !primitive_query.is_empty() { if !primitive_query.is_empty() {
@ -291,14 +291,14 @@ fn create_query_tree(
optional_words: bool, optional_words: bool,
authorize_typos: bool, authorize_typos: bool,
query: &[PrimitiveQueryPart], query: &[PrimitiveQueryPart],
) -> anyhow::Result<Operation> ) -> Result<Operation>
{ {
/// Matches on the `PrimitiveQueryPart` and create an operation from it. /// Matches on the `PrimitiveQueryPart` and create an operation from it.
fn resolve_primitive_part( fn resolve_primitive_part(
ctx: &impl Context, ctx: &impl Context,
authorize_typos: bool, authorize_typos: bool,
part: PrimitiveQueryPart, part: PrimitiveQueryPart,
) -> anyhow::Result<Operation> ) -> Result<Operation>
{ {
match part { match part {
// 1. try to split word in 2 // 1. try to split word in 2
@ -325,7 +325,7 @@ fn create_query_tree(
ctx: &impl Context, ctx: &impl Context,
authorize_typos: bool, authorize_typos: bool,
query: &[PrimitiveQueryPart], query: &[PrimitiveQueryPart],
) -> anyhow::Result<Operation> ) -> Result<Operation>
{ {
const MAX_NGRAM: usize = 3; const MAX_NGRAM: usize = 3;
let mut op_children = Vec::new(); let mut op_children = Vec::new();
@ -379,7 +379,7 @@ fn create_query_tree(
ctx: &impl Context, ctx: &impl Context,
authorize_typos: bool, authorize_typos: bool,
query: PrimitiveQuery, query: PrimitiveQuery,
) -> anyhow::Result<Operation> ) -> Result<Operation>
{ {
let number_phrases = query.iter().filter(|p| p.is_phrase()).count(); let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
let mut operation_children = Vec::new(); let mut operation_children = Vec::new();
@ -532,7 +532,7 @@ mod test {
authorize_typos: bool, authorize_typos: bool,
words_limit: Option<usize>, words_limit: Option<usize>,
query: TokenStream, query: TokenStream,
) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> ) -> Result<Option<(Operation, PrimitiveQuery)>>
{ {
let primitive_query = create_primitive_query(query, None, words_limit); let primitive_query = create_primitive_query(query, None, words_limit);
if !primitive_query.is_empty() { if !primitive_query.is_empty() {

View File

@ -1,6 +1,7 @@
use chrono::Utc; use chrono::Utc;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{ExternalDocumentsIds, Index, FieldsDistribution};
use crate::{ExternalDocumentsIds, Index, FieldsDistribution, Result};
pub struct ClearDocuments<'t, 'u, 'i> { pub struct ClearDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -18,7 +19,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
ClearDocuments { wtxn, index, _update_id: update_id } ClearDocuments { wtxn, index, _update_id: update_id }
} }
pub fn execute(self) -> anyhow::Result<u64> { pub fn execute(self) -> Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?; self.index.set_updated_at(self.wtxn, &Utc::now())?;
let Index { let Index {
env: _env, env: _env,

View File

@ -1,15 +1,15 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use anyhow::{anyhow, Context};
use chrono::Utc; use chrono::Utc;
use fst::IntoStreamer; use fst::IntoStreamer;
use heed::types::{ByteSlice, Unit}; use heed::types::{ByteSlice, Unit};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde_json::Value; use serde_json::Value;
use crate::error::{InternalError, UserError};
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds}; use crate::{Index, DocumentId, FieldId, BEU32, SmallString32, ExternalDocumentsIds, Result};
use super::ClearDocuments; use super::ClearDocuments;
pub struct DeleteDocuments<'t, 'u, 'i> { pub struct DeleteDocuments<'t, 'u, 'i> {
@ -25,7 +25,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index, index: &'i Index,
update_id: u64, update_id: u64,
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>> ) -> Result<DeleteDocuments<'t, 'u, 'i>>
{ {
let external_documents_ids = index let external_documents_ids = index
.external_documents_ids(wtxn)? .external_documents_ids(wtxn)?
@ -54,7 +54,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
Some(docid) Some(docid)
} }
pub fn execute(self) -> anyhow::Result<u64> { pub fn execute(self) -> Result<u64> {
self.index.set_updated_at(self.wtxn, &Utc::now())?; self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We retrieve the current documents ids that are in the database. // We retrieve the current documents ids that are in the database.
let mut documents_ids = self.index.documents_ids(self.wtxn)?; let mut documents_ids = self.index.documents_ids(self.wtxn)?;
@ -77,7 +77,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
} }
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let primary_key = self.index.primary_key(self.wtxn)?.context("missing primary key")?; let primary_key = self.index.primary_key(self.wtxn)?.ok_or_else(|| {
InternalError::DatabaseMissingEntry { db_name: "main", key: Some("primary-key") }
})?;
let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#); let id_field = fields_ids_map.id(primary_key).expect(r#"the field "id" to be present"#);
let Index { let Index {
@ -119,7 +121,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
let external_id = match serde_json::from_slice(content).unwrap() { let external_id = match serde_json::from_slice(content).unwrap() {
Value::String(string) => SmallString32::from(string.as_str()), Value::String(string) => SmallString32::from(string.as_str()),
Value::Number(number) => SmallString32::from(number.to_string()), Value::Number(number) => SmallString32::from(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")), document_id => return Err(UserError::InvalidDocumentId { document_id }.into()),
}; };
external_ids.push(external_id); external_ids.push(external_id);
} }

View File

@ -9,11 +9,12 @@ use heed::{BytesEncode, Error};
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::InternalError;
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::Index;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
use crate::{Index, Result};
pub struct Facets<'t, 'u, 'i> { pub struct Facets<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -55,7 +56,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self self
} }
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> Result<()> {
self.index.set_updated_at(self.wtxn, &Utc::now())?; self.index.set_updated_at(self.wtxn, &Utc::now())?;
// We get the faceted fields to be able to create the facet levels. // We get the faceted fields to be able to create the facet levels.
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
@ -102,7 +103,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self.wtxn, self.wtxn,
*self.index.facet_id_f64_docids.as_polymorph(), *self.index.facet_id_f64_docids.as_polymorph(),
content, content,
|_, _| anyhow::bail!("invalid facet number level merging"), |_, _| Err(InternalError::IndexingMergingKeys { process: "facet number level" }),
WriteMethod::GetMergePut, WriteMethod::GetMergePut,
)?; )?;
} }
@ -132,7 +133,7 @@ fn compute_facet_number_levels<'t>(
level_group_size: NonZeroUsize, level_group_size: NonZeroUsize,
min_level_size: NonZeroUsize, min_level_size: NonZeroUsize,
field_id: u8, field_id: u8,
) -> anyhow::Result<Reader<FileFuse>> ) -> Result<Reader<FileFuse>>
{ {
let first_level_size = db let first_level_size = db
.remap_key_type::<ByteSlice>() .remap_key_type::<ByteSlice>()
@ -195,7 +196,7 @@ fn compute_faceted_documents_ids(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>, db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8, field_id: u8,
) -> anyhow::Result<RoaringBitmap> ) -> Result<RoaringBitmap>
{ {
let mut documents_ids = RoaringBitmap::new(); let mut documents_ids = RoaringBitmap::new();
@ -214,7 +215,7 @@ fn write_number_entry(
left: f64, left: f64,
right: f64, right: f64,
ids: &RoaringBitmap, ids: &RoaringBitmap,
) -> anyhow::Result<()> ) -> Result<()>
{ {
let key = (field_id, level, left, right); let key = (field_id, level, left, right);
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?; let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;

View File

@ -1,17 +1,19 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::result::Result as StdResult;
use fst::IntoStreamer; use fst::IntoStreamer;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::Result;
/// Only the last value associated with an id is kept. /// Only the last value associated with an id is kept.
pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn keep_latest_obkv(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
Ok(obkvs.last().unwrap().clone().into_owned()) Ok(obkvs.last().unwrap().clone().into_owned())
} }
/// Merge all the obks in the order we see them. /// Merge all the obks in the order we see them.
pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let mut iter = obkvs.iter(); let mut iter = obkvs.iter();
let first = iter.next().map(|b| b.clone().into_owned()).unwrap(); let first = iter.next().map(|b| b.clone().into_owned()).unwrap();
Ok(iter.fold(first, |acc, current| { Ok(iter.fold(first, |acc, current| {
@ -24,8 +26,8 @@ pub fn merge_obkvs(_key: &[u8], obkvs: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>>
} }
// Union of multiple FSTs // Union of multiple FSTs
pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let fsts = values.iter().map(fst::Set::new).collect::<Result<Vec<_>, _>>()?; let fsts = values.iter().map(fst::Set::new).collect::<StdResult<Vec<_>, _>>()?;
let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect(); let op_builder: fst::set::OpBuilder = fsts.iter().map(|fst| fst.into_stream()).collect();
let op = op_builder.r#union(); let op = op_builder.r#union();
@ -34,7 +36,7 @@ pub fn fst_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
Ok(build.into_inner().unwrap()) Ok(build.into_inner().unwrap())
} }
pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
Ok(values.first().unwrap().to_vec()) Ok(values.first().unwrap().to_vec())
} }
@ -54,7 +56,7 @@ pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mu
writer.finish().unwrap(); writer.finish().unwrap();
} }
pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap(); let (head, tail) = values.split_first().unwrap();
let mut head = RoaringBitmap::deserialize_from(&head[..])?; let mut head = RoaringBitmap::deserialize_from(&head[..])?;
@ -68,7 +70,7 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result
Ok(vec) Ok(vec)
} }
pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> { pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap(); let (head, tail) = values.split_first().unwrap();
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?; let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;

View File

@ -3,11 +3,11 @@ use std::collections::HashSet;
use std::fs::File; use std::fs::File;
use std::io::{self, Seek, SeekFrom, BufReader, BufRead}; use std::io::{self, Seek, SeekFrom, BufReader, BufRead};
use std::num::{NonZeroU32, NonZeroUsize}; use std::num::{NonZeroU32, NonZeroUsize};
use std::result::Result as StdResult;
use std::str; use std::str;
use std::sync::mpsc::sync_channel; use std::sync::mpsc::sync_channel;
use std::time::Instant; use std::time::Instant;
use anyhow::Context;
use bstr::ByteSlice as _; use bstr::ByteSlice as _;
use chrono::Utc; use chrono::Utc;
use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType}; use grenad::{MergerIter, Writer, Sorter, Merger, Reader, FileFuse, CompressionType};
@ -18,7 +18,8 @@ use rayon::prelude::*;
use rayon::ThreadPool; use rayon::ThreadPool;
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
use crate::index::Index; use crate::error::{Error, InternalError};
use crate::{Index, Result};
use crate::update::{ use crate::update::{
Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep, Facets, WordsLevelPositions, WordPrefixDocids, WordsPrefixesFst, UpdateIndexingStep,
WordPrefixPairProximityDocids, WordPrefixPairProximityDocids,
@ -56,14 +57,14 @@ pub fn create_writer(typ: CompressionType, level: Option<u32>, file: File) -> io
builder.build(file) builder.build(file)
} }
pub fn create_sorter( pub fn create_sorter<E>(
merge: MergeFn, merge: MergeFn<E>,
chunk_compression_type: CompressionType, chunk_compression_type: CompressionType,
chunk_compression_level: Option<u32>, chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>, chunk_fusing_shrink_size: Option<u64>,
max_nb_chunks: Option<usize>, max_nb_chunks: Option<usize>,
max_memory: Option<usize>, max_memory: Option<usize>,
) -> Sorter<MergeFn> ) -> Sorter<MergeFn<E>>
{ {
let mut builder = Sorter::builder(merge); let mut builder = Sorter::builder(merge);
if let Some(shrink_size) = chunk_fusing_shrink_size { if let Some(shrink_size) = chunk_fusing_shrink_size {
@ -82,7 +83,7 @@ pub fn create_sorter(
builder.build() builder.build()
} }
pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> anyhow::Result<Reader<FileFuse>> { pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> Result<Reader<FileFuse>> {
let mut file = writer.into_inner()?; let mut file = writer.into_inner()?;
file.seek(SeekFrom::Start(0))?; file.seek(SeekFrom::Start(0))?;
let file = if let Some(shrink_size) = shrink_size { let file = if let Some(shrink_size) = shrink_size {
@ -93,19 +94,25 @@ pub fn writer_into_reader(writer: Writer<File>, shrink_size: Option<u64>) -> any
Reader::new(file).map_err(Into::into) Reader::new(file).map_err(Into::into)
} }
pub fn merge_readers(sources: Vec<Reader<FileFuse>>, merge: MergeFn) -> Merger<FileFuse, MergeFn> { pub fn merge_readers<E>(
sources: Vec<Reader<FileFuse>>,
merge: MergeFn<E>,
) -> Merger<FileFuse, MergeFn<E>>
{
let mut builder = Merger::builder(merge); let mut builder = Merger::builder(merge);
builder.extend(sources); builder.extend(sources);
builder.build() builder.build()
} }
pub fn merge_into_lmdb_database( pub fn merge_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase, database: heed::PolyDatabase,
sources: Vec<Reader<FileFuse>>, sources: Vec<Reader<FileFuse>>,
merge: MergeFn, merge: MergeFn<E>,
method: WriteMethod, method: WriteMethod,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
debug!("Merging {} MTBL stores...", sources.len()); debug!("Merging {} MTBL stores...", sources.len());
let before = Instant::now(); let before = Instant::now();
@ -123,13 +130,15 @@ pub fn merge_into_lmdb_database(
Ok(()) Ok(())
} }
pub fn write_into_lmdb_database( pub fn write_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase, database: heed::PolyDatabase,
mut reader: Reader<FileFuse>, mut reader: Reader<FileFuse>,
merge: MergeFn, merge: MergeFn<E>,
method: WriteMethod, method: WriteMethod,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
debug!("Writing MTBL stores..."); debug!("Writing MTBL stores...");
let before = Instant::now(); let before = Instant::now();
@ -138,9 +147,7 @@ pub fn write_into_lmdb_database(
WriteMethod::Append => { WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = reader.next()? { while let Some((k, v)) = reader.next()? {
out_iter.append(k, v).with_context(|| { out_iter.append(k, v)?;
format!("writing {:?} into LMDB", k.as_bstr())
})?;
} }
}, },
WriteMethod::GetMergePut => { WriteMethod::GetMergePut => {
@ -165,13 +172,16 @@ pub fn write_into_lmdb_database(
Ok(()) Ok(())
} }
pub fn sorter_into_lmdb_database( pub fn sorter_into_lmdb_database<E>(
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase, database: heed::PolyDatabase,
sorter: Sorter<MergeFn>, sorter: Sorter<MergeFn<E>>,
merge: MergeFn, merge: MergeFn<E>,
method: WriteMethod, method: WriteMethod,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
Error: From<grenad::Error<E>>
{ {
debug!("Writing MTBL sorter..."); debug!("Writing MTBL sorter...");
let before = Instant::now(); let before = Instant::now();
@ -188,21 +198,21 @@ pub fn sorter_into_lmdb_database(
Ok(()) Ok(())
} }
fn merger_iter_into_lmdb_database<R: io::Read>( fn merger_iter_into_lmdb_database<R: io::Read, E>(
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase, database: heed::PolyDatabase,
mut sorter: MergerIter<R, MergeFn>, mut sorter: MergerIter<R, MergeFn<E>>,
merge: MergeFn, merge: MergeFn<E>,
method: WriteMethod, method: WriteMethod,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
match method { match method {
WriteMethod::Append => { WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = sorter.next()? { while let Some((k, v)) = sorter.next()? {
out_iter.append(k, v).with_context(|| { out_iter.append(k, v)?;
format!("writing {:?} into LMDB", k.as_bstr())
})?;
} }
}, },
WriteMethod::GetMergePut => { WriteMethod::GetMergePut => {
@ -211,7 +221,10 @@ fn merger_iter_into_lmdb_database<R: io::Read>(
match iter.next().transpose()? { match iter.next().transpose()? {
Some((key, old_val)) if key == k => { Some((key, old_val)) if key == k => {
let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)]; let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
let val = merge(k, &vals).expect("merge failed"); let val = merge(k, &vals).map_err(|_| {
// TODO just wrap this error?
InternalError::IndexingMergingKeys { process: "get-put-merge" }
})?;
iter.put_current(k, &val)?; iter.put_current(k, &val)?;
}, },
_ => { _ => {
@ -318,7 +331,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
self.autogenerate_docids = false; self.autogenerate_docids = false;
} }
pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<DocumentAdditionResult> pub fn execute<R, F>(self, reader: R, progress_callback: F) -> Result<DocumentAdditionResult>
where where
R: io::Read, R: io::Read,
F: Fn(UpdateIndexingStep, u64) + Sync, F: Fn(UpdateIndexingStep, u64) + Sync,
@ -365,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
Ok(DocumentAdditionResult { nb_documents }) Ok(DocumentAdditionResult { nb_documents })
} }
pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> anyhow::Result<()> pub fn execute_raw<F>(self, output: TransformOutput, progress_callback: F) -> Result<()>
where where
F: Fn(UpdateIndexingStep) + Sync F: Fn(UpdateIndexingStep) + Sync
{ {
@ -403,15 +416,12 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("{} documents actually deleted", deleted_documents_count); debug!("{} documents actually deleted", deleted_documents_count);
} }
let mmap; if documents_count == 0 {
let bytes = if documents_count == 0 { return Ok(());
&[][..] }
} else {
mmap = unsafe { Mmap::map(&documents_file).context("mmaping the transform documents file")? };
&mmap
};
let documents = grenad::Reader::new(bytes).unwrap(); let bytes = unsafe { Mmap::map(&documents_file)? };
let documents = grenad::Reader::new(bytes.as_bytes()).unwrap();
// The enum which indicates the type of the readers // The enum which indicates the type of the readers
// merges that are potentially done on different threads. // merges that are potentially done on different threads.
@ -477,7 +487,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
&progress_callback, &progress_callback,
) )
}) })
.collect::<Result<Vec<_>, _>>()?; .collect::<StdResult<Vec<_>, _>>()?;
let mut main_readers = Vec::with_capacity(readers.len()); let mut main_readers = Vec::with_capacity(readers.len());
let mut word_docids_readers = Vec::with_capacity(readers.len()); let mut word_docids_readers = Vec::with_capacity(readers.len());
@ -535,7 +545,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
debug!("Merging the main, word docids and words pairs proximity docids in parallel..."); debug!("Merging the main, word docids and words pairs proximity docids in parallel...");
rayon::spawn(move || { rayon::spawn(move || {
vec![ vec![
(DatabaseType::Main, main_readers, fst_merge as MergeFn), (DatabaseType::Main, main_readers, fst_merge as MergeFn<_>),
(DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge), (DatabaseType::WordDocids, word_docids_readers, roaring_bitmap_merge),
( (
DatabaseType::FacetLevel0NumbersDocids, DatabaseType::FacetLevel0NumbersDocids,
@ -570,7 +580,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
facet_field_strings_docids_readers, facet_field_strings_docids_readers,
field_id_docid_facet_numbers_readers, field_id_docid_facet_numbers_readers,
field_id_docid_facet_strings_readers, field_id_docid_facet_strings_readers,
)) as anyhow::Result<_> )) as Result<_>
})?; })?;
let ( let (

View File

@ -6,7 +6,6 @@ use std::iter::FromIterator;
use std::time::Instant; use std::time::Instant;
use std::{cmp, iter}; use std::{cmp, iter};
use anyhow::Context;
use bstr::ByteSlice as _; use bstr::ByteSlice as _;
use fst::Set; use fst::Set;
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType}; use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
@ -19,11 +18,12 @@ use roaring::RoaringBitmap;
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use crate::error::{Error, InternalError, SerializationError};
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
use crate::update::UpdateIndexingStep; use crate::update::UpdateIndexingStep;
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId}; use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, Result};
use super::{MergeFn, create_writer, create_sorter, writer_into_reader}; use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge}; use super::merge_function::{fst_merge, keep_first, roaring_bitmap_merge, cbo_roaring_bitmap_merge};
@ -66,15 +66,15 @@ pub struct Store<'s, A> {
chunk_compression_level: Option<u32>, chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>, chunk_fusing_shrink_size: Option<u64>,
// MTBL sorters // MTBL sorters
main_sorter: Sorter<MergeFn>, main_sorter: Sorter<MergeFn<Error>>,
word_docids_sorter: Sorter<MergeFn>, word_docids_sorter: Sorter<MergeFn<Error>>,
words_pairs_proximities_docids_sorter: Sorter<MergeFn>, words_pairs_proximities_docids_sorter: Sorter<MergeFn<Error>>,
word_level_position_docids_sorter: Sorter<MergeFn>, word_level_position_docids_sorter: Sorter<MergeFn<Error>>,
field_id_word_count_docids_sorter: Sorter<MergeFn>, field_id_word_count_docids_sorter: Sorter<MergeFn<Error>>,
facet_field_numbers_docids_sorter: Sorter<MergeFn>, facet_field_numbers_docids_sorter: Sorter<MergeFn<Error>>,
facet_field_strings_docids_sorter: Sorter<MergeFn>, facet_field_strings_docids_sorter: Sorter<MergeFn<Error>>,
field_id_docid_facet_numbers_sorter: Sorter<MergeFn>, field_id_docid_facet_numbers_sorter: Sorter<MergeFn<Error>>,
field_id_docid_facet_strings_sorter: Sorter<MergeFn>, field_id_docid_facet_strings_sorter: Sorter<MergeFn<Error>>,
// MTBL writers // MTBL writers
docid_word_positions_writer: Writer<File>, docid_word_positions_writer: Writer<File>,
documents_writer: Writer<File>, documents_writer: Writer<File>,
@ -93,7 +93,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
chunk_compression_level: Option<u32>, chunk_compression_level: Option<u32>,
chunk_fusing_shrink_size: Option<u64>, chunk_fusing_shrink_size: Option<u64>,
stop_words: Option<&'s Set<A>>, stop_words: Option<&'s Set<A>>,
) -> anyhow::Result<Self> ) -> Result<Self>
{ {
// We divide the max memory by the number of sorter the Store have. // We divide the max memory by the number of sorter the Store have.
let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5)); let max_memory = max_memory.map(|mm| cmp::max(ONE_KILOBYTE, mm / 5));
@ -221,7 +221,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
} }
// Save the documents ids under the position and word we have seen it. // Save the documents ids under the position and word we have seen it.
fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> anyhow::Result<()> { fn insert_word_docid(&mut self, word: &str, id: DocumentId) -> Result<()> {
// if get_refresh finds the element it is assured to be at the end of the linked hash map. // if get_refresh finds the element it is assured to be at the end of the linked hash map.
match self.word_docids.get_refresh(word.as_bytes()) { match self.word_docids.get_refresh(word.as_bytes()) {
Some(old) => { old.insert(id); }, Some(old) => { old.insert(id); },
@ -246,7 +246,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
field_id: FieldId, field_id: FieldId,
value: OrderedFloat<f64>, value: OrderedFloat<f64>,
id: DocumentId, id: DocumentId,
) -> anyhow::Result<()> ) -> Result<()>
{ {
let sorter = &mut self.field_id_docid_facet_numbers_sorter; let sorter = &mut self.field_id_docid_facet_numbers_sorter;
Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?; Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
@ -279,7 +279,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
field_id: FieldId, field_id: FieldId,
value: String, value: String,
id: DocumentId, id: DocumentId,
) -> anyhow::Result<()> ) -> Result<()>
{ {
let sorter = &mut self.field_id_docid_facet_strings_sorter; let sorter = &mut self.field_id_docid_facet_strings_sorter;
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?; Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
@ -311,7 +311,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
&mut self, &mut self,
words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>, words_pairs_proximities: impl IntoIterator<Item=((&'a str, &'a str), u8)>,
id: DocumentId, id: DocumentId,
) -> anyhow::Result<()> ) -> Result<()>
{ {
for ((w1, w2), prox) in words_pairs_proximities { for ((w1, w2), prox) in words_pairs_proximities {
let w1 = SmallVec32::from(w1.as_bytes()); let w1 = SmallVec32::from(w1.as_bytes());
@ -350,7 +350,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>, facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
facet_strings_values: &mut HashMap<FieldId, Vec<String>>, facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
record: &[u8], record: &[u8],
) -> anyhow::Result<()> ) -> Result<()>
{ {
// We compute the list of words pairs proximities (self-join) and write it directly to disk. // We compute the list of words pairs proximities (self-join) and write it directly to disk.
let words_pair_proximities = compute_words_pair_proximities(&words_positions); let words_pair_proximities = compute_words_pair_proximities(&words_positions);
@ -385,10 +385,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_words_pairs_proximities( fn write_words_pairs_proximities<E>(
sorter: &mut Sorter<MergeFn>, sorter: &mut Sorter<MergeFn<E>>,
iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>, iter: impl IntoIterator<Item=((SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap)>,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
let mut key = Vec::new(); let mut key = Vec::new();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@ -417,7 +419,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
writer: &mut Writer<File>, writer: &mut Writer<File>,
id: DocumentId, id: DocumentId,
words_positions: &HashMap<String, SmallVec32<Position>>, words_positions: &HashMap<String, SmallVec32<Position>>,
) -> anyhow::Result<()> ) -> Result<()>
{ {
// We prefix the words by the document id. // We prefix the words by the document id.
let mut key = id.to_be_bytes().to_vec(); let mut key = id.to_be_bytes().to_vec();
@ -445,11 +447,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_word_position_docids( fn write_word_position_docids<E>(
writer: &mut Sorter<MergeFn>, writer: &mut Sorter<MergeFn<E>>,
document_id: DocumentId, document_id: DocumentId,
words_positions: &HashMap<String, SmallVec32<Position>>, words_positions: &HashMap<String, SmallVec32<Position>>,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();
let mut data_buffer = Vec::new(); let mut data_buffer = Vec::new();
@ -480,11 +484,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_facet_field_string_docids<I>( fn write_facet_field_string_docids<I, E>(
sorter: &mut Sorter<MergeFn>, sorter: &mut Sorter<MergeFn<E>>,
iter: I, iter: I,
) -> anyhow::Result<()> ) -> Result<()>
where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)> where
I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>,
Error: From<E>,
{ {
let mut key_buffer = Vec::new(); let mut key_buffer = Vec::new();
let mut data_buffer = Vec::new(); let mut data_buffer = Vec::new();
@ -504,11 +510,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_facet_field_number_docids<I>( fn write_facet_field_number_docids<I, E>(
sorter: &mut Sorter<MergeFn>, sorter: &mut Sorter<MergeFn<E>>,
iter: I, iter: I,
) -> anyhow::Result<()> ) -> Result<()>
where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)> where
I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>,
Error: From<E>,
{ {
let mut data_buffer = Vec::new(); let mut data_buffer = Vec::new();
@ -517,7 +525,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value)) let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
.map(Cow::into_owned) .map(Cow::into_owned)
.context("could not serialize facet level value key")?; .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer); CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
@ -529,16 +537,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_field_id_docid_facet_number_value( fn write_field_id_docid_facet_number_value<E>(
sorter: &mut Sorter<MergeFn>, sorter: &mut Sorter<MergeFn<E>>,
field_id: FieldId, field_id: FieldId,
document_id: DocumentId, document_id: DocumentId,
value: OrderedFloat<f64>, value: OrderedFloat<f64>,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value)) let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
.map(Cow::into_owned) .map(Cow::into_owned)
.context("could not serialize facet level value key")?; .ok_or(SerializationError::Encoding { db_name: Some("facet level value") })?;
if lmdb_key_valid_size(&key) { if lmdb_key_valid_size(&key) {
sorter.insert(&key, &[])?; sorter.insert(&key, &[])?;
@ -547,12 +557,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_field_id_docid_facet_string_value( fn write_field_id_docid_facet_string_value<E>(
sorter: &mut Sorter<MergeFn>, sorter: &mut Sorter<MergeFn<E>>,
field_id: FieldId, field_id: FieldId,
document_id: DocumentId, document_id: DocumentId,
value: &str, value: &str,
) -> anyhow::Result<()> ) -> Result<()>
where
Error: From<E>,
{ {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@ -565,8 +577,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(()) Ok(())
} }
fn write_word_docids<I>(sorter: &mut Sorter<MergeFn>, iter: I) -> anyhow::Result<()> fn write_word_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
where I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)> where
I: IntoIterator<Item=(SmallVec32<u8>, RoaringBitmap)>,
Error: From<E>,
{ {
let mut key = Vec::new(); let mut key = Vec::new();
let mut buffer = Vec::new(); let mut buffer = Vec::new();
@ -596,7 +610,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
num_threads: usize, num_threads: usize,
log_every_n: Option<usize>, log_every_n: Option<usize>,
mut progress_callback: F, mut progress_callback: F,
) -> anyhow::Result<Readers> ) -> Result<Readers>
where F: FnMut(UpdateIndexingStep), where F: FnMut(UpdateIndexingStep),
{ {
debug!("{:?}: Indexing in a Store...", thread_index); debug!("{:?}: Indexing in a Store...", thread_index);
@ -625,7 +639,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
for (attr, content) in document.iter() { for (attr, content) in document.iter() {
if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) { if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
let value = serde_json::from_slice(content)?; let value = serde_json::from_slice(content).map_err(InternalError::SerdeJson)?;
let (facet_numbers, facet_strings) = extract_facet_values(&value); let (facet_numbers, facet_strings) = extract_facet_values(&value);
facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers); facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
@ -679,7 +693,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
Ok(readers) Ok(readers)
} }
fn finish(mut self) -> anyhow::Result<Readers> { fn finish(mut self) -> Result<Readers> {
let comp_type = self.chunk_compression_type; let comp_type = self.chunk_compression_type;
let comp_level = self.chunk_compression_level; let comp_level = self.chunk_compression_level;
let shrink_size = self.chunk_fusing_shrink_size; let shrink_size = self.chunk_fusing_shrink_size;

View File

@ -2,17 +2,19 @@ use std::borrow::Cow;
use std::fs::File; use std::fs::File;
use std::io::{Read, Seek, SeekFrom}; use std::io::{Read, Seek, SeekFrom};
use std::iter::Peekable; use std::iter::Peekable;
use std::result::Result as StdResult;
use std::time::Instant; use std::time::Instant;
use anyhow::{anyhow, Context};
use grenad::CompressionType; use grenad::CompressionType;
use log::info; use log::info;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use crate::error::{Error, UserError, InternalError};
use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv}; use crate::update::index_documents::merge_function::{merge_obkvs, keep_latest_obkv};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::{Index, BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution}; use crate::{BEU32, MergeFn, FieldsIdsMap, ExternalDocumentsIds, FieldId, FieldsDistribution};
use crate::{Index, Result};
use super::merge_function::merge_two_obkvs; use super::merge_function::merge_two_obkvs;
use super::{create_writer, create_sorter, IndexDocumentsMethod}; use super::{create_writer, create_sorter, IndexDocumentsMethod};
@ -53,7 +55,7 @@ fn is_primary_key(field: impl AsRef<str>) -> bool {
} }
impl Transform<'_, '_> { impl Transform<'_, '_> {
pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput> pub fn output_from_json<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where where
R: Read, R: Read,
F: Fn(UpdateIndexingStep) + Sync, F: Fn(UpdateIndexingStep) + Sync,
@ -61,7 +63,7 @@ impl Transform<'_, '_> {
self.output_from_generic_json(reader, false, progress_callback) self.output_from_generic_json(reader, false, progress_callback)
} }
pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput> pub fn output_from_json_stream<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where where
R: Read, R: Read,
F: Fn(UpdateIndexingStep) + Sync, F: Fn(UpdateIndexingStep) + Sync,
@ -74,7 +76,7 @@ impl Transform<'_, '_> {
reader: R, reader: R,
is_stream: bool, is_stream: bool,
progress_callback: F, progress_callback: F,
) -> anyhow::Result<TransformOutput> ) -> Result<TransformOutput>
where where
R: Read, R: Read,
F: Fn(UpdateIndexingStep) + Sync, F: Fn(UpdateIndexingStep) + Sync,
@ -88,7 +90,7 @@ impl Transform<'_, '_> {
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>; let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
iter.peekable() iter.peekable()
} else { } else {
let vec: Vec<_> = serde_json::from_reader(reader)?; let vec: Vec<_> = serde_json::from_reader(reader).map_err(UserError::SerdeJson)?;
let iter = vec.into_iter().map(Ok); let iter = vec.into_iter().map(Ok);
let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>; let iter = Box::new(iter) as Box<dyn Iterator<Item=_>>;
iter.peekable() iter.peekable()
@ -96,9 +98,12 @@ impl Transform<'_, '_> {
// We extract the primary key from the first document in // We extract the primary key from the first document in
// the batch if it hasn't already been defined in the index // the batch if it hasn't already been defined in the index
let first = match documents.peek().map(Result::as_ref).transpose() { let first = match documents.peek().map(StdResult::as_ref).transpose() {
Ok(first) => first, Ok(first) => first,
Err(_) => return Err(documents.next().unwrap().unwrap_err().into()), Err(_) => {
let error = documents.next().unwrap().unwrap_err();
return Err(UserError::SerdeJson(error).into());
},
}; };
let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned()); let alternative_name = first.and_then(|doc| doc.keys().find(|f| is_primary_key(f)).cloned());
@ -145,7 +150,7 @@ impl Transform<'_, '_> {
let mut documents_count = 0; let mut documents_count = 0;
for result in documents { for result in documents {
let document = result?; let document = result.map_err(UserError::SerdeJson)?;
if self.log_every_n.map_or(false, |len| documents_count % len == 0) { if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat { progress_callback(UpdateIndexingStep::TransformFromUserIntoGenericFormat {
@ -158,7 +163,7 @@ impl Transform<'_, '_> {
// We prepare the fields ids map with the documents keys. // We prepare the fields ids map with the documents keys.
for (key, _value) in &document { for (key, _value) in &document {
fields_ids_map.insert(&key).context("field id limit reached")?; fields_ids_map.insert(&key).ok_or(UserError::AttributeLimitReached)?;
} }
// We retrieve the user id from the document based on the primary key name, // We retrieve the user id from the document based on the primary key name,
@ -167,11 +172,13 @@ impl Transform<'_, '_> {
Some(value) => match value { Some(value) => match value {
Value::String(string) => Cow::Borrowed(string.as_str()), Value::String(string) => Cow::Borrowed(string.as_str()),
Value::Number(number) => Cow::Owned(number.to_string()), Value::Number(number) => Cow::Owned(number.to_string()),
_ => return Err(anyhow!("documents ids must be either strings or numbers")), content => return Err(UserError::InvalidDocumentId {
document_id: content.clone(),
}.into()),
}, },
None => { None => {
if !self.autogenerate_docids { if !self.autogenerate_docids {
return Err(anyhow!("missing primary key")); return Err(UserError::MissingPrimaryKey.into());
} }
let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer); let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer);
Cow::Borrowed(uuid) Cow::Borrowed(uuid)
@ -186,13 +193,15 @@ impl Transform<'_, '_> {
// and this should be the document id we return the one we generated. // and this should be the document id we return the one we generated.
if let Some(value) = document.get(name) { if let Some(value) = document.get(name) {
// We serialize the attribute values. // We serialize the attribute values.
serde_json::to_writer(&mut json_buffer, value)?; serde_json::to_writer(&mut json_buffer, value).map_err(InternalError::SerdeJson)?;
writer.insert(field_id, &json_buffer)?; writer.insert(field_id, &json_buffer)?;
} }
// We validate the document id [a-zA-Z0-9\-_]. // We validate the document id [a-zA-Z0-9\-_].
if field_id == primary_key_id && validate_document_id(&external_id).is_none() { if field_id == primary_key_id && validate_document_id(&external_id).is_none() {
return Err(anyhow!("invalid document id: {:?}", external_id)); return Err(UserError::InvalidDocumentId {
document_id: Value::from(external_id),
}.into());
} }
} }
@ -217,7 +226,7 @@ impl Transform<'_, '_> {
) )
} }
pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<TransformOutput> pub fn output_from_csv<R, F>(self, reader: R, progress_callback: F) -> Result<TransformOutput>
where where
R: Read, R: Read,
F: Fn(UpdateIndexingStep) + Sync, F: Fn(UpdateIndexingStep) + Sync,
@ -226,12 +235,12 @@ impl Transform<'_, '_> {
let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap(); let external_documents_ids = self.index.external_documents_ids(self.rtxn).unwrap();
let mut csv = csv::Reader::from_reader(reader); let mut csv = csv::Reader::from_reader(reader);
let headers = csv.headers()?; let headers = csv.headers().map_err(UserError::Csv)?;
let mut fields_ids = Vec::new(); let mut fields_ids = Vec::new();
// Generate the new fields ids based on the current fields ids and this CSV headers. // Generate the new fields ids based on the current fields ids and this CSV headers.
for (i, header) in headers.iter().enumerate() { for (i, header) in headers.iter().enumerate() {
let id = fields_ids_map.insert(header).context("field id limit reached)")?; let id = fields_ids_map.insert(header).ok_or(UserError::AttributeLimitReached)?;
fields_ids.push((id, i)); fields_ids.push((id, i));
} }
@ -281,7 +290,7 @@ impl Transform<'_, '_> {
let mut documents_count = 0; let mut documents_count = 0;
let mut record = csv::StringRecord::new(); let mut record = csv::StringRecord::new();
while csv.read_record(&mut record)? { while csv.read_record(&mut record).map_err(UserError::Csv)? {
obkv_buffer.clear(); obkv_buffer.clear();
let mut writer = obkv::KvWriter::new(&mut obkv_buffer); let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
@ -298,7 +307,9 @@ impl Transform<'_, '_> {
// We validate the document id [a-zA-Z0-9\-_]. // We validate the document id [a-zA-Z0-9\-_].
match validate_document_id(&external_id) { match validate_document_id(&external_id) {
Some(valid) => valid, Some(valid) => valid,
None => return Err(anyhow!("invalid document id: {:?}", external_id)), None => return Err(UserError::InvalidDocumentId {
document_id: Value::from(external_id),
}.into()),
} }
}, },
None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer), None => uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer),
@ -316,7 +327,7 @@ impl Transform<'_, '_> {
for (field_id, field) in iter { for (field_id, field) in iter {
// We serialize the attribute values as JSON strings. // We serialize the attribute values as JSON strings.
json_buffer.clear(); json_buffer.clear();
serde_json::to_writer(&mut json_buffer, &field)?; serde_json::to_writer(&mut json_buffer, &field).map_err(InternalError::SerdeJson)?;
writer.insert(*field_id, &json_buffer)?; writer.insert(*field_id, &json_buffer)?;
} }
@ -344,17 +355,18 @@ impl Transform<'_, '_> {
/// Generate the `TransformOutput` based on the given sorter that can be generated from any /// Generate the `TransformOutput` based on the given sorter that can be generated from any
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
/// id for the user side and the value must be an obkv where keys are valid fields ids. /// id for the user side and the value must be an obkv where keys are valid fields ids.
fn output_from_sorter<F>( fn output_from_sorter<F, E>(
self, self,
sorter: grenad::Sorter<MergeFn>, sorter: grenad::Sorter<MergeFn<E>>,
primary_key: String, primary_key: String,
fields_ids_map: FieldsIdsMap, fields_ids_map: FieldsIdsMap,
approximate_number_of_documents: usize, approximate_number_of_documents: usize,
mut external_documents_ids: ExternalDocumentsIds<'_>, mut external_documents_ids: ExternalDocumentsIds<'_>,
progress_callback: F, progress_callback: F,
) -> anyhow::Result<TransformOutput> ) -> Result<TransformOutput>
where where
F: Fn(UpdateIndexingStep) + Sync, F: Fn(UpdateIndexingStep) + Sync,
Error: From<E>,
{ {
let documents_ids = self.index.documents_ids(self.rtxn)?; let documents_ids = self.index.documents_ids(self.rtxn)?;
let mut fields_distribution = self.index.fields_distribution(self.rtxn)?; let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
@ -362,7 +374,7 @@ impl Transform<'_, '_> {
// Once we have sort and deduplicated the documents we write them into a final file. // Once we have sort and deduplicated the documents we write them into a final file.
let mut final_sorter = create_sorter( let mut final_sorter = create_sorter(
|_docid, _obkvs| Err(anyhow!("cannot merge two documents")), |_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }),
self.chunk_compression_type, self.chunk_compression_type,
self.chunk_compression_level, self.chunk_compression_level,
self.chunk_fusing_shrink_size, self.chunk_fusing_shrink_size,
@ -398,7 +410,10 @@ impl Transform<'_, '_> {
IndexDocumentsMethod::UpdateDocuments => { IndexDocumentsMethod::UpdateDocuments => {
let key = BEU32::new(docid); let key = BEU32::new(docid);
let base_obkv = self.index.documents.get(&self.rtxn, &key)? let base_obkv = self.index.documents.get(&self.rtxn, &key)?
.context("document not found")?; .ok_or(InternalError::DatabaseMissingEntry {
db_name: "documents",
key: None,
})?;
let update_obkv = obkv::KvReader::new(update_obkv); let update_obkv = obkv::KvReader::new(update_obkv);
merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer); merge_two_obkvs(base_obkv, update_obkv, &mut obkv_buffer);
(docid, obkv_buffer.as_slice()) (docid, obkv_buffer.as_slice())
@ -409,7 +424,7 @@ impl Transform<'_, '_> {
// If this user id is new we add it to the external documents ids map // If this user id is new we add it to the external documents ids map
// for new ids and into the list of new documents. // for new ids and into the list of new documents.
let new_docid = available_documents_ids.next() let new_docid = available_documents_ids.next()
.context("no more available documents ids")?; .ok_or(UserError::DocumentLimitReached)?;
new_external_documents_ids_builder.insert(external_id, new_docid as u64)?; new_external_documents_ids_builder.insert(external_id, new_docid as u64)?;
new_documents_ids.insert(new_docid); new_documents_ids.insert(new_docid);
(new_docid, update_obkv) (new_docid, update_obkv)
@ -469,7 +484,7 @@ impl Transform<'_, '_> {
primary_key: String, primary_key: String,
old_fields_ids_map: FieldsIdsMap, old_fields_ids_map: FieldsIdsMap,
new_fields_ids_map: FieldsIdsMap, new_fields_ids_map: FieldsIdsMap,
) -> anyhow::Result<TransformOutput> ) -> Result<TransformOutput>
{ {
let fields_distribution = self.index.fields_distribution(self.rtxn)?; let fields_distribution = self.index.fields_distribution(self.rtxn)?;
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?; let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
@ -529,10 +544,10 @@ fn compute_primary_key_pair(
fields_ids_map: &mut FieldsIdsMap, fields_ids_map: &mut FieldsIdsMap,
alternative_name: Option<String>, alternative_name: Option<String>,
autogenerate_docids: bool, autogenerate_docids: bool,
) -> anyhow::Result<(FieldId, String)> { ) -> Result<(FieldId, String)> {
match primary_key { match primary_key {
Some(primary_key) => { Some(primary_key) => {
let id = fields_ids_map.insert(primary_key).ok_or(anyhow!("Maximum number of fields exceeded"))?; let id = fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
Ok((id, primary_key.to_string())) Ok((id, primary_key.to_string()))
} }
None => { None => {
@ -542,12 +557,12 @@ fn compute_primary_key_pair(
if !autogenerate_docids { if !autogenerate_docids {
// If there is no primary key in the current document batch, we must // If there is no primary key in the current document batch, we must
// return an error and not automatically generate any document id. // return an error and not automatically generate any document id.
anyhow::bail!("missing primary key") return Err(UserError::MissingPrimaryKey.into());
} }
DEFAULT_PRIMARY_KEY_NAME.to_string() DEFAULT_PRIMARY_KEY_NAME.to_string()
}, },
}; };
let id = fields_ids_map.insert(&name).context("field id limit reached")?; let id = fields_ids_map.insert(&name).ok_or(UserError::AttributeLimitReached)?;
Ok((id, name)) Ok((id, name))
}, },
} }

View File

@ -1,6 +1,6 @@
use std::collections::{BTreeSet, HashMap, HashSet}; use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult;
use anyhow::Context;
use chrono::Utc; use chrono::Utc;
use grenad::CompressionType; use grenad::CompressionType;
use itertools::Itertools; use itertools::Itertools;
@ -9,9 +9,10 @@ use rayon::ThreadPool;
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::criterion::Criterion; use crate::criterion::Criterion;
use crate::error::UserError;
use crate::update::index_documents::{IndexDocumentsMethod, Transform}; use crate::update::index_documents::{IndexDocumentsMethod, Transform};
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
use crate::{FieldsIdsMap, Index}; use crate::{FieldsIdsMap, Index, Result};
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Setting<T> { pub enum Setting<T> {
@ -33,7 +34,7 @@ impl<T> Setting<T> {
} }
impl<T: Serialize> Serialize for Setting<T> { impl<T: Serialize> Serialize for Setting<T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: Serializer { fn serialize<S>(&self, serializer: S) -> StdResult<S::Ok, S::Error> where S: Serializer {
match self { match self {
Self::Set(value) => Some(value), Self::Set(value) => Some(value),
// Usually not_set isn't serialized by setting skip_serializing_if field attribute // Usually not_set isn't serialized by setting skip_serializing_if field attribute
@ -43,7 +44,7 @@ impl<T: Serialize> Serialize for Setting<T> {
} }
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> { impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: Deserializer<'de> { fn deserialize<D>(deserializer: D) -> StdResult<Self, D::Error> where D: Deserializer<'de> {
Deserialize::deserialize(deserializer).map(|x| match x { Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x), Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value None => Self::Reset, // Reset is forced by sending null value
@ -165,7 +166,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
} }
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
where where
F: Fn(UpdateIndexingStep, u64) + Sync F: Fn(UpdateIndexingStep, u64) + Sync
{ {
@ -192,7 +193,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}; };
// There already has been a document addition, the primary key should be set by now. // There already has been a document addition, the primary key should be set by now.
let primary_key = self.index.primary_key(&self.wtxn)?.context("Index must have a primary key")?; let primary_key = self.index.primary_key(&self.wtxn)?.ok_or(UserError::MissingPrimaryKey)?;
// We remap the documents fields based on the new `FieldsIdsMap`. // We remap the documents fields based on the new `FieldsIdsMap`.
let output = transform.remap_index_documents( let output = transform.remap_index_documents(
@ -220,7 +221,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(()) Ok(())
} }
fn update_displayed(&mut self) -> anyhow::Result<bool> { fn update_displayed(&mut self) -> Result<bool> {
match self.displayed_fields { match self.displayed_fields {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@ -234,7 +235,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in names.iter() { for name in names.iter() {
fields_ids_map fields_ids_map
.insert(name) .insert(name)
.context("field id limit exceeded")?; .ok_or(UserError::AttributeLimitReached)?;
} }
self.index.put_displayed_fields(self.wtxn, &names)?; self.index.put_displayed_fields(self.wtxn, &names)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@ -245,13 +246,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(true) Ok(true)
} }
fn update_distinct_field(&mut self) -> anyhow::Result<bool> { fn update_distinct_field(&mut self) -> Result<bool> {
match self.distinct_field { match self.distinct_field {
Setting::Set(ref attr) => { Setting::Set(ref attr) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
fields_ids_map fields_ids_map
.insert(attr) .insert(attr)
.context("field id limit exceeded")?; .ok_or(UserError::AttributeLimitReached)?;
self.index.put_distinct_field(self.wtxn, &attr)?; self.index.put_distinct_field(self.wtxn, &attr)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@ -264,7 +265,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
/// Updates the index's searchable attributes. This causes the field map to be recomputed to /// Updates the index's searchable attributes. This causes the field map to be recomputed to
/// reflect the order of the searchable attributes. /// reflect the order of the searchable attributes.
fn update_searchable(&mut self) -> anyhow::Result<bool> { fn update_searchable(&mut self) -> Result<bool> {
match self.searchable_fields { match self.searchable_fields {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
// every time the searchable attributes are updated, we need to update the // every time the searchable attributes are updated, we need to update the
@ -285,13 +286,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in names.iter() { for name in names.iter() {
new_fields_ids_map new_fields_ids_map
.insert(&name) .insert(&name)
.context("field id limit exceeded")?; .ok_or(UserError::AttributeLimitReached)?;
} }
for (_, name) in old_fields_ids_map.iter() { for (_, name) in old_fields_ids_map.iter() {
new_fields_ids_map new_fields_ids_map
.insert(&name) .insert(&name)
.context("field id limit exceeded")?; .ok_or(UserError::AttributeLimitReached)?;
} }
self.index.put_searchable_fields(self.wtxn, &names)?; self.index.put_searchable_fields(self.wtxn, &names)?;
@ -303,7 +304,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(true) Ok(true)
} }
fn update_stop_words(&mut self) -> anyhow::Result<bool> { fn update_stop_words(&mut self) -> Result<bool> {
match self.stop_words { match self.stop_words {
Setting::Set(ref stop_words) => { Setting::Set(ref stop_words) => {
let current = self.index.stop_words(self.wtxn)?; let current = self.index.stop_words(self.wtxn)?;
@ -325,7 +326,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
} }
fn update_synonyms(&mut self) -> anyhow::Result<bool> { fn update_synonyms(&mut self) -> Result<bool> {
match self.synonyms { match self.synonyms {
Setting::Set(ref synonyms) => { Setting::Set(ref synonyms) => {
fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> { fn normalize(analyzer: &Analyzer<&[u8]>, text: &str) -> Vec<String> {
@ -383,13 +384,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
} }
fn update_filterable(&mut self) -> anyhow::Result<()> { fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields { match self.filterable_fields {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_facets = HashSet::new(); let mut new_facets = HashSet::new();
for name in fields { for name in fields {
fields_ids_map.insert(name).context("field id limit exceeded")?; fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
new_facets.insert(name.clone()); new_facets.insert(name.clone());
} }
self.index.put_filterable_fields(self.wtxn, &new_facets)?; self.index.put_filterable_fields(self.wtxn, &new_facets)?;
@ -401,7 +402,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(()) Ok(())
} }
fn update_criteria(&mut self) -> anyhow::Result<()> { fn update_criteria(&mut self) -> Result<()> {
match self.criteria { match self.criteria {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
@ -409,7 +410,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for name in fields { for name in fields {
let criterion: Criterion = name.parse()?; let criterion: Criterion = name.parse()?;
if let Some(name) = criterion.field_name() { if let Some(name) = criterion.field_name() {
fields_ids_map.insert(name).context("field id limit exceeded")?; fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
} }
new_criteria.push(criterion); new_criteria.push(criterion);
} }
@ -422,7 +423,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(()) Ok(())
} }
pub fn execute<F>(mut self, progress_callback: F) -> anyhow::Result<()> pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
where where
F: Fn(UpdateIndexingStep, u64) + Sync F: Fn(UpdateIndexingStep, u64) + Sync
{ {

View File

@ -1,7 +1,7 @@
use grenad::CompressionType; use grenad::CompressionType;
use rayon::ThreadPool; use rayon::ThreadPool;
use crate::Index; use crate::{Index, Result};
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets}; use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
pub struct UpdateBuilder<'a> { pub struct UpdateBuilder<'a> {
@ -76,7 +76,7 @@ impl<'a> UpdateBuilder<'a> {
self, self,
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index, index: &'i Index,
) -> anyhow::Result<DeleteDocuments<'t, 'u, 'i>> ) -> Result<DeleteDocuments<'t, 'u, 'i>>
{ {
DeleteDocuments::new(wtxn, index, self.update_id) DeleteDocuments::new(wtxn, index, self.update_id)
} }

View File

@ -5,6 +5,7 @@ use fst::Streamer;
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::ByteSlice; use heed::types::ByteSlice;
use crate::Result;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database, create_sorter, roaring_bitmap_merge, sorter_into_lmdb_database,
@ -33,7 +34,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
} }
} }
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> Result<()> {
// Clear the word prefix docids database. // Clear the word prefix docids database.
self.index.word_prefix_docids.clear(self.wtxn)?; self.index.word_prefix_docids.clear(self.wtxn)?;

View File

@ -7,7 +7,7 @@ use heed::BytesEncode;
use heed::types::ByteSlice; use heed::types::ByteSlice;
use log::debug; use log::debug;
use crate::Index; use crate::{Index, Result};
use crate::heed_codec::StrStrU8Codec; use crate::heed_codec::StrStrU8Codec;
use crate::update::index_documents::{ use crate::update::index_documents::{
WriteMethod, create_sorter, sorter_into_lmdb_database, WriteMethod, create_sorter, sorter_into_lmdb_database,
@ -41,7 +41,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
} }
} }
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> Result<()> {
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk..."); debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?; self.index.word_prefix_pair_proximity_docids.clear(self.wtxn)?;

View File

@ -11,7 +11,9 @@ use heed::{BytesEncode, Error};
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::InternalError;
use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec}; use crate::heed_codec::{StrLevelPositionCodec, CboRoaringBitmapCodec};
use crate::Result;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{ use crate::update::index_documents::{
create_writer, create_sorter, writer_into_reader, write_into_lmdb_database, create_writer, create_sorter, writer_into_reader, write_into_lmdb_database,
@ -56,7 +58,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self self
} }
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> Result<()> {
debug!("Computing and writing the word levels positions docids into LMDB on disk..."); debug!("Computing and writing the word levels positions docids into LMDB on disk...");
let entries = compute_positions_levels( let entries = compute_positions_levels(
@ -78,7 +80,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self.wtxn, self.wtxn,
*self.index.word_level_position_docids.as_polymorph(), *self.index.word_level_position_docids.as_polymorph(),
entries, entries,
|_, _| anyhow::bail!("invalid word level position merging"), |_, _| Err(InternalError::IndexingMergingKeys { process: "word level position" }),
WriteMethod::Append, WriteMethod::Append,
)?; )?;
@ -142,7 +144,7 @@ impl<'t, 'u, 'i> WordsLevelPositions<'t, 'u, 'i> {
self.wtxn, self.wtxn,
*self.index.word_prefix_level_position_docids.as_polymorph(), *self.index.word_prefix_level_position_docids.as_polymorph(),
entries, entries,
|_, _| anyhow::bail!("invalid word prefix level position merging"), |_, _| Err(InternalError::IndexingMergingKeys { process: "word prefix level position" }),
WriteMethod::Append, WriteMethod::Append,
)?; )?;
@ -174,7 +176,7 @@ fn compute_positions_levels(
shrink_size: Option<u64>, shrink_size: Option<u64>,
level_group_size: NonZeroU32, level_group_size: NonZeroU32,
min_level_size: NonZeroU32, min_level_size: NonZeroU32,
) -> anyhow::Result<Reader<FileFuse>> ) -> Result<Reader<FileFuse>>
{ {
// It is forbidden to keep a cursor and write in a database at the same time with LMDB // It is forbidden to keep a cursor and write in a database at the same time with LMDB
// therefore we write the facet levels entries into a grenad file before transfering them. // therefore we write the facet levels entries into a grenad file before transfering them.
@ -251,7 +253,7 @@ fn write_level_entry(
left: u32, left: u32,
right: u32, right: u32,
ids: &RoaringBitmap, ids: &RoaringBitmap,
) -> anyhow::Result<()> ) -> Result<()>
{ {
let key = (word, level, left, right); let key = (word, level, left, right);
let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?; let key = StrLevelPositionCodec::bytes_encode(&key).ok_or(Error::Encoding)?;

View File

@ -2,7 +2,7 @@ use std::iter::FromIterator;
use std::str; use std::str;
use fst::Streamer; use fst::Streamer;
use crate::{Index, SmallString32}; use crate::{Index, SmallString32, Result};
pub struct WordsPrefixesFst<'t, 'u, 'i> { pub struct WordsPrefixesFst<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -48,7 +48,7 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
self self
} }
pub fn execute(self) -> anyhow::Result<()> { pub fn execute(self) -> Result<()> {
let words_fst = self.index.words_fst(&self.wtxn)?; let words_fst = self.index.words_fst(&self.wtxn)?;
let number_of_words = words_fst.len(); let number_of_words = words_fst.len();
let min_number_of_words = (number_of_words as f64 * self.threshold) as usize; let min_number_of_words = (number_of_words as f64 * self.threshold) as usize;