mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Split the update side to use the number and the strings facet databases
This commit is contained in:
parent
038e03a4e4
commit
bd7b285bae
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashSet;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use anyhow::{Context, bail};
|
use anyhow::{Context, bail};
|
||||||
@ -6,8 +6,6 @@ use regex::Regex;
|
|||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
|
|
||||||
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
|
static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
|
Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()
|
||||||
});
|
});
|
||||||
@ -33,7 +31,7 @@ pub enum Criterion {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Criterion {
|
impl Criterion {
|
||||||
pub fn from_str(faceted_attributes: &HashMap<String, FacetType>, txt: &str) -> anyhow::Result<Criterion> {
|
pub fn from_str(faceted_attributes: &HashSet<String>, txt: &str) -> anyhow::Result<Criterion> {
|
||||||
match txt {
|
match txt {
|
||||||
"words" => Ok(Criterion::Words),
|
"words" => Ok(Criterion::Words),
|
||||||
"typo" => Ok(Criterion::Typo),
|
"typo" => Ok(Criterion::Typo),
|
||||||
@ -44,7 +42,9 @@ impl Criterion {
|
|||||||
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
|
||||||
let order = caps.get(1).unwrap().as_str();
|
let order = caps.get(1).unwrap().as_str();
|
||||||
let field_name = caps.get(2).unwrap().as_str();
|
let field_name = caps.get(2).unwrap().as_str();
|
||||||
faceted_attributes.get(field_name).with_context(|| format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name))?;
|
faceted_attributes.get(field_name).with_context(|| {
|
||||||
|
format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name)
|
||||||
|
})?;
|
||||||
match order {
|
match order {
|
||||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
@ -18,24 +18,24 @@ use crate::heed_codec::facet::{
|
|||||||
FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FacetValueStringCodec, FacetLevelValueF64Codec,
|
FacetValueStringCodec, FacetLevelValueF64Codec,
|
||||||
};
|
};
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
|
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key";
|
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key";
|
||||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||||
pub const FACETED_DOCUMENTS_IDS_PREFIX: &str = "faceted-documents-ids";
|
|
||||||
pub const FACETED_FIELDS_KEY: &str = "faceted-fields";
|
pub const FACETED_FIELDS_KEY: &str = "faceted-fields";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
|
||||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
|
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||||
|
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||||
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
||||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
|
||||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
|
||||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||||
|
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||||
|
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||||
const CREATED_AT_KEY: &str = "created-at";
|
const CREATED_AT_KEY: &str = "created-at";
|
||||||
const UPDATED_AT_KEY: &str = "updated-at";
|
const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
@ -321,53 +321,97 @@ impl Index {
|
|||||||
|
|
||||||
/* faceted fields */
|
/* faceted fields */
|
||||||
|
|
||||||
/// Writes the facet fields associated with their facet type or `None` if
|
/// Writes the facet fields names in the database.
|
||||||
/// the facet type is currently unknown.
|
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||||
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields_types: &HashMap<String, FacetType>) -> heed::Result<()> {
|
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields)
|
||||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields_types)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Deletes the facet fields ids associated with their facet type.
|
/// Deletes the facet fields ids in the database.
|
||||||
pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||||
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
|
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the facet fields names associated with their facet type.
|
/// Returns the facet fields names.
|
||||||
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, FacetType>> {
|
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
|
||||||
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `faceted_fields`, but returns ids instead.
|
/// Same as `faceted_fields`, but returns ids instead.
|
||||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashMap<FieldId, FacetType>> {
|
pub fn faceted_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
|
||||||
let faceted_fields = self.faceted_fields(rtxn)?;
|
let faceted_fields = self.faceted_fields(rtxn)?;
|
||||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||||
let faceted_fields = faceted_fields
|
let faceted_fields = faceted_fields
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(k, v)| {
|
.map(|k| {
|
||||||
let kid = fields_ids_map
|
fields_ids_map
|
||||||
.id(k)
|
.id(k)
|
||||||
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
|
||||||
.expect("corrupted data: ");
|
.expect("corrupted data: ")
|
||||||
(kid, *v)
|
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Ok(faceted_fields)
|
Ok(faceted_fields)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* faceted documents ids */
|
/* faceted documents ids */
|
||||||
|
|
||||||
/// Writes the documents ids that are faceted under this field id.
|
/// Writes the documents ids that are faceted with numbers under this field id.
|
||||||
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: FieldId, docids: &RoaringBitmap) -> heed::Result<()> {
|
pub fn put_number_faceted_documents_ids(
|
||||||
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
&self,
|
||||||
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
wtxn: &mut RwTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
docids: &RoaringBitmap,
|
||||||
|
) -> heed::Result<()>
|
||||||
|
{
|
||||||
|
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
|
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
|
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve all the documents ids that faceted under this field id.
|
/// Retrieve all the documents ids that faceted with numbers under this field id.
|
||||||
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: FieldId) -> heed::Result<RoaringBitmap> {
|
pub fn number_faceted_documents_ids(
|
||||||
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
&self,
|
||||||
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
rtxn: &RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
) -> heed::Result<RoaringBitmap>
|
||||||
|
{
|
||||||
|
let mut buffer = [0u8; STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
|
buffer[..STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
|
.copy_from_slice(STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
|
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
||||||
|
Some(docids) => Ok(docids),
|
||||||
|
None => Ok(RoaringBitmap::new()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Writes the documents ids that are faceted with strings under this field id.
|
||||||
|
pub fn put_string_faceted_documents_ids(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
docids: &RoaringBitmap,
|
||||||
|
) -> heed::Result<()>
|
||||||
|
{
|
||||||
|
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
|
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
|
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
|
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retrieve all the documents ids that faceted with strings under this field id.
|
||||||
|
pub fn string_faceted_documents_ids(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
field_id: FieldId,
|
||||||
|
) -> heed::Result<RoaringBitmap>
|
||||||
|
{
|
||||||
|
let mut buffer = [0u8; NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
|
||||||
|
buffer[..NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()]
|
||||||
|
.copy_from_slice(NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
|
||||||
*buffer.last_mut().unwrap() = field_id;
|
*buffer.last_mut().unwrap() = field_id;
|
||||||
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
|
||||||
Some(docids) => Ok(docids),
|
Some(docids) => Ok(docids),
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use anyhow::{bail, Context as _};
|
use anyhow::Context;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
@ -23,7 +23,6 @@ pub struct AscDesc<'t> {
|
|||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
field_name: String,
|
field_name: String,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
facet_type: FacetType,
|
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||||
@ -51,6 +50,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
Self::new(index, rtxn, parent, field_name, false)
|
Self::new(index, rtxn, parent, field_name, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn new(
|
fn new(
|
||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
@ -60,19 +60,19 @@ impl<'t> AscDesc<'t> {
|
|||||||
) -> anyhow::Result<Self> {
|
) -> anyhow::Result<Self> {
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let faceted_fields = index.faceted_fields(rtxn)?;
|
let faceted_fields = index.faceted_fields(rtxn)?;
|
||||||
let (field_id, facet_type) =
|
let field_id = fields_ids_map
|
||||||
field_id_facet_type(&fields_ids_map, &faceted_fields, &field_name)?;
|
.id(&field_name)
|
||||||
|
.with_context(|| format!("field {:?} isn't registered", field_name))?;
|
||||||
|
|
||||||
Ok(AscDesc {
|
Ok(AscDesc {
|
||||||
index,
|
index,
|
||||||
rtxn,
|
rtxn,
|
||||||
field_name,
|
field_name,
|
||||||
field_id,
|
field_id,
|
||||||
facet_type,
|
|
||||||
ascending,
|
ascending,
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: Box::new(std::iter::empty()),
|
candidates: Box::new(std::iter::empty()),
|
||||||
faceted_candidates: index.faceted_documents_ids(rtxn, field_id)?,
|
faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: RoaringBitmap::new(),
|
||||||
parent,
|
parent,
|
||||||
})
|
})
|
||||||
@ -165,27 +165,20 @@ fn facet_ordered<'t>(
|
|||||||
index: &'t Index,
|
index: &'t Index,
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
facet_type: FacetType,
|
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
) -> anyhow::Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||||
match facet_type {
|
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||||
FacetType::Number => {
|
let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
||||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
let iter =
|
} else {
|
||||||
iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?;
|
let facet_fn = if ascending {
|
||||||
Ok(Box::new(iter.map(Ok)) as Box<dyn Iterator<Item = _>>)
|
FacetIter::new_reducing
|
||||||
} else {
|
} else {
|
||||||
let facet_fn = if ascending {
|
FacetIter::new_reverse_reducing
|
||||||
FacetIter::new_reducing
|
};
|
||||||
} else {
|
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
||||||
FacetIter::new_reverse_reducing
|
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
||||||
};
|
|
||||||
let iter = facet_fn(rtxn, index, field_id, candidates)?;
|
|
||||||
Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids))))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
FacetType::String => bail!("criteria facet type must be a number"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
use crate::heed_codec::facet::*;
|
use crate::heed_codec::facet::*;
|
||||||
use crate::{facet::FacetType, DocumentId, FieldId, Index};
|
use crate::{DocumentId, FieldId, Index};
|
||||||
|
|
||||||
const FID_SIZE: usize = size_of::<FieldId>();
|
const FID_SIZE: usize = size_of::<FieldId>();
|
||||||
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||||
@ -22,7 +22,6 @@ pub struct FacetDistinct<'a> {
|
|||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
txn: &'a heed::RoTxn<'a>,
|
txn: &'a heed::RoTxn<'a>,
|
||||||
facet_type: FacetType,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> FacetDistinct<'a> {
|
impl<'a> FacetDistinct<'a> {
|
||||||
@ -30,14 +29,9 @@ impl<'a> FacetDistinct<'a> {
|
|||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
txn: &'a heed::RoTxn<'a>,
|
txn: &'a heed::RoTxn<'a>,
|
||||||
facet_type: FacetType,
|
) -> Self
|
||||||
) -> Self {
|
{
|
||||||
Self {
|
Self { distinct, index, txn }
|
||||||
distinct,
|
|
||||||
index,
|
|
||||||
txn,
|
|
||||||
facet_type,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -45,7 +39,6 @@ pub struct FacetDistinctIter<'a> {
|
|||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
distinct: FieldId,
|
distinct: FieldId,
|
||||||
excluded: RoaringBitmap,
|
excluded: RoaringBitmap,
|
||||||
facet_type: FacetType,
|
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
iter_offset: usize,
|
iter_offset: usize,
|
||||||
txn: &'a heed::RoTxn<'a>,
|
txn: &'a heed::RoTxn<'a>,
|
||||||
@ -117,6 +110,7 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
// increasing the offset we make sure to get the first valid value for the next
|
// increasing the offset we make sure to get the first valid value for the next
|
||||||
// distinct document to keep.
|
// distinct document to keep.
|
||||||
self.iter_offset += 1;
|
self.iter_offset += 1;
|
||||||
|
|
||||||
Ok(Some(id))
|
Ok(Some(id))
|
||||||
}
|
}
|
||||||
// no more candidate at this offset, return.
|
// no more candidate at this offset, return.
|
||||||
@ -188,7 +182,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
|
|||||||
candidates,
|
candidates,
|
||||||
distinct: self.distinct,
|
distinct: self.distinct,
|
||||||
excluded,
|
excluded,
|
||||||
facet_type: self.facet_type,
|
|
||||||
index: self.index,
|
index: self.index,
|
||||||
iter_offset: 0,
|
iter_offset: 0,
|
||||||
txn: self.txn,
|
txn: self.txn,
|
||||||
|
@ -145,7 +145,7 @@ impl<'a> Search<'a> {
|
|||||||
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
let faceted_fields = self.index.faceted_fields(self.rtxn)?;
|
||||||
match faceted_fields.get(name) {
|
match faceted_fields.get(name) {
|
||||||
Some(facet_type) => {
|
Some(facet_type) => {
|
||||||
let distinct = FacetDistinct::new(id, self.index, self.rtxn, *facet_type);
|
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
|
||||||
self.perform_sort(distinct, matching_words, criteria)
|
self.perform_sort(distinct, matching_words, criteria)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
|
@ -49,8 +49,10 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
self.index.put_fields_distribution(self.wtxn, &FieldsDistribution::default())?;
|
self.index.put_fields_distribution(self.wtxn, &FieldsDistribution::default())?;
|
||||||
|
|
||||||
// We clean all the faceted documents ids.
|
// We clean all the faceted documents ids.
|
||||||
for (field_id, _) in faceted_fields {
|
let empty = RoaringBitmap::default();
|
||||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, &RoaringBitmap::default())?;
|
for field_id in faceted_fields {
|
||||||
|
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &empty)?;
|
||||||
|
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &empty)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the other databases.
|
// Clear the other databases.
|
||||||
|
@ -330,11 +330,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Remove the documents ids from the faceted documents ids.
|
// Remove the documents ids from the faceted documents ids.
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||||
for (field_id, facet_type) in faceted_fields {
|
// Remove docids from the number faceted documents ids
|
||||||
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
|
let mut docids = self.index.number_faceted_documents_ids(self.wtxn, field_id)?;
|
||||||
docids.difference_with(&self.documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
remove_docids_from_field_id_docid_facet_value(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
@ -344,6 +344,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
|(_fid, docid, _value)| docid,
|
|(_fid, docid, _value)| docid,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
// Remove docids from the string faceted documents ids
|
||||||
|
let mut docids = self.index.string_faceted_documents_ids(self.wtxn, field_id)?;
|
||||||
|
docids.difference_with(&self.documents_ids);
|
||||||
|
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &docids)?;
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
remove_docids_from_field_id_docid_facet_value(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
field_id_docid_facet_strings,
|
field_id_docid_facet_strings,
|
||||||
|
@ -9,7 +9,6 @@ use heed::{BytesEncode, Error};
|
|||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
@ -62,56 +61,51 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
|
|
||||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||||
for (field_id, facet_type) in faceted_fields {
|
|
||||||
let (content, documents_ids) = match facet_type {
|
|
||||||
FacetType::String => {
|
|
||||||
let documents_ids = compute_faceted_documents_ids(
|
|
||||||
self.wtxn,
|
|
||||||
self.index.facet_field_id_value_docids,
|
|
||||||
field_id,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
(None, documents_ids)
|
for field_id in faceted_fields {
|
||||||
},
|
// Compute and store the faceted strings documents ids.
|
||||||
FacetType::Number => {
|
let string_documents_ids = compute_faceted_documents_ids(
|
||||||
clear_field_number_levels(
|
self.wtxn,
|
||||||
self.wtxn,
|
self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
|
||||||
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
|
field_id,
|
||||||
field_id,
|
)?;
|
||||||
)?;
|
|
||||||
|
|
||||||
let documents_ids = compute_faceted_documents_ids(
|
// Clear the facet number levels.
|
||||||
self.wtxn,
|
clear_field_number_levels(
|
||||||
self.index.facet_field_id_value_docids,
|
self.wtxn,
|
||||||
field_id,
|
self.index.facet_id_f64_docids,
|
||||||
)?;
|
field_id,
|
||||||
|
)?;
|
||||||
|
|
||||||
let content = compute_facet_number_levels(
|
// Compute and store the faceted numbers documents ids.
|
||||||
self.wtxn,
|
let number_documents_ids = compute_faceted_documents_ids(
|
||||||
self.index.facet_field_id_value_docids.remap_key_type::<FacetLevelValueF64Codec>(),
|
self.wtxn,
|
||||||
self.chunk_compression_type,
|
self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
|
||||||
self.chunk_compression_level,
|
field_id,
|
||||||
self.chunk_fusing_shrink_size,
|
)?;
|
||||||
self.level_group_size,
|
|
||||||
self.min_level_size,
|
|
||||||
field_id,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
(Some(content), documents_ids)
|
let content = compute_facet_number_levels(
|
||||||
},
|
self.wtxn,
|
||||||
};
|
self.index.facet_id_f64_docids,
|
||||||
|
self.chunk_compression_type,
|
||||||
|
self.chunk_compression_level,
|
||||||
|
self.chunk_fusing_shrink_size,
|
||||||
|
self.level_group_size,
|
||||||
|
self.min_level_size,
|
||||||
|
field_id,
|
||||||
|
)?;
|
||||||
|
|
||||||
if let Some(content) = content {
|
self.index.put_string_faceted_documents_ids(self.wtxn, field_id, &string_documents_ids)?;
|
||||||
write_into_lmdb_database(
|
self.index.put_number_faceted_documents_ids(self.wtxn, field_id, &number_documents_ids)?;
|
||||||
self.wtxn,
|
|
||||||
*self.index.facet_field_id_value_docids.as_polymorph(),
|
|
||||||
content,
|
|
||||||
|_, _| anyhow::bail!("invalid facet level merging"),
|
|
||||||
WriteMethod::GetMergePut,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, &documents_ids)?;
|
// Store the
|
||||||
|
write_into_lmdb_database(
|
||||||
|
self.wtxn,
|
||||||
|
*self.index.facet_id_f64_docids.as_polymorph(),
|
||||||
|
content,
|
||||||
|
|_, _| anyhow::bail!("invalid facet number level merging"),
|
||||||
|
WriteMethod::GetMergePut,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -205,10 +199,12 @@ fn compute_faceted_documents_ids(
|
|||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
for result in db.prefix_iter(rtxn, &[field_id])? {
|
for result in db.prefix_iter(rtxn, &[field_id])? {
|
||||||
let (_key, docids) = result?;
|
let (_key, docids) = result?;
|
||||||
documents_ids.union_with(&docids);
|
documents_ids |= docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(documents_ids)
|
Ok(documents_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -412,7 +412,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
Main,
|
Main,
|
||||||
WordDocids,
|
WordDocids,
|
||||||
WordLevel0PositionDocids,
|
WordLevel0PositionDocids,
|
||||||
FacetLevel0ValuesDocids,
|
FacetLevel0NumbersDocids,
|
||||||
}
|
}
|
||||||
|
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
@ -478,8 +478,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
let mut docid_word_positions_readers = Vec::with_capacity(readers.len());
|
let mut docid_word_positions_readers = Vec::with_capacity(readers.len());
|
||||||
let mut words_pairs_proximities_docids_readers = Vec::with_capacity(readers.len());
|
let mut words_pairs_proximities_docids_readers = Vec::with_capacity(readers.len());
|
||||||
let mut word_level_position_docids_readers = Vec::with_capacity(readers.len());
|
let mut word_level_position_docids_readers = Vec::with_capacity(readers.len());
|
||||||
let mut facet_field_value_docids_readers = Vec::with_capacity(readers.len());
|
let mut facet_field_numbers_docids_readers = Vec::with_capacity(readers.len());
|
||||||
let mut field_id_docid_facet_values_readers = Vec::with_capacity(readers.len());
|
let mut facet_field_strings_docids_readers = Vec::with_capacity(readers.len());
|
||||||
|
let mut field_id_docid_facet_numbers_readers = Vec::with_capacity(readers.len());
|
||||||
|
let mut field_id_docid_facet_strings_readers = Vec::with_capacity(readers.len());
|
||||||
let mut documents_readers = Vec::with_capacity(readers.len());
|
let mut documents_readers = Vec::with_capacity(readers.len());
|
||||||
readers.into_iter().for_each(|readers| {
|
readers.into_iter().for_each(|readers| {
|
||||||
let Readers {
|
let Readers {
|
||||||
@ -488,17 +490,21 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
words_pairs_proximities_docids,
|
words_pairs_proximities_docids,
|
||||||
word_level_position_docids,
|
word_level_position_docids,
|
||||||
facet_field_value_docids,
|
facet_field_numbers_docids,
|
||||||
field_id_docid_facet_values,
|
facet_field_strings_docids,
|
||||||
documents
|
field_id_docid_facet_numbers,
|
||||||
|
field_id_docid_facet_strings,
|
||||||
|
documents,
|
||||||
} = readers;
|
} = readers;
|
||||||
main_readers.push(main);
|
main_readers.push(main);
|
||||||
word_docids_readers.push(word_docids);
|
word_docids_readers.push(word_docids);
|
||||||
docid_word_positions_readers.push(docid_word_positions);
|
docid_word_positions_readers.push(docid_word_positions);
|
||||||
words_pairs_proximities_docids_readers.push(words_pairs_proximities_docids);
|
words_pairs_proximities_docids_readers.push(words_pairs_proximities_docids);
|
||||||
word_level_position_docids_readers.push(word_level_position_docids);
|
word_level_position_docids_readers.push(word_level_position_docids);
|
||||||
facet_field_value_docids_readers.push(facet_field_value_docids);
|
facet_field_numbers_docids_readers.push(facet_field_numbers_docids);
|
||||||
field_id_docid_facet_values_readers.push(field_id_docid_facet_values);
|
facet_field_strings_docids_readers.push(facet_field_strings_docids);
|
||||||
|
field_id_docid_facet_numbers_readers.push(field_id_docid_facet_numbers);
|
||||||
|
field_id_docid_facet_strings_readers.push(field_id_docid_facet_strings);
|
||||||
documents_readers.push(documents);
|
documents_readers.push(documents);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -523,8 +529,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
(DatabaseType::Main, main_readers, main_merge as MergeFn),
|
(DatabaseType::Main, main_readers, main_merge as MergeFn),
|
||||||
(DatabaseType::WordDocids, word_docids_readers, word_docids_merge),
|
(DatabaseType::WordDocids, word_docids_readers, word_docids_merge),
|
||||||
(
|
(
|
||||||
DatabaseType::FacetLevel0ValuesDocids,
|
DatabaseType::FacetLevel0NumbersDocids,
|
||||||
facet_field_value_docids_readers,
|
facet_field_numbers_docids_readers,
|
||||||
facet_field_value_docids_merge,
|
facet_field_value_docids_merge,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
@ -547,7 +553,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
docid_word_positions_readers,
|
docid_word_positions_readers,
|
||||||
documents_readers,
|
documents_readers,
|
||||||
words_pairs_proximities_docids_readers,
|
words_pairs_proximities_docids_readers,
|
||||||
field_id_docid_facet_values_readers,
|
facet_field_numbers_docids_readers,
|
||||||
|
facet_field_strings_docids_readers,
|
||||||
|
field_id_docid_facet_numbers_readers,
|
||||||
|
field_id_docid_facet_strings_readers,
|
||||||
)) as anyhow::Result<_>
|
)) as anyhow::Result<_>
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@ -556,7 +565,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
docid_word_positions_readers,
|
docid_word_positions_readers,
|
||||||
documents_readers,
|
documents_readers,
|
||||||
words_pairs_proximities_docids_readers,
|
words_pairs_proximities_docids_readers,
|
||||||
field_id_docid_facet_values_readers,
|
facet_field_numbers_docids_readers,
|
||||||
|
facet_field_strings_docids_readers,
|
||||||
|
field_id_docid_facet_numbers_readers,
|
||||||
|
field_id_docid_facet_strings_readers,
|
||||||
) = readers;
|
) = readers;
|
||||||
|
|
||||||
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
@ -624,11 +636,26 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
total_databases,
|
total_databases,
|
||||||
});
|
});
|
||||||
|
|
||||||
debug!("Writing the field id docid facet values into LMDB on disk...");
|
debug!("Writing the field id docid facet numbers into LMDB on disk...");
|
||||||
merge_into_lmdb_database(
|
merge_into_lmdb_database(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
*self.index.field_id_docid_facet_values.as_polymorph(),
|
*self.index.field_id_docid_facet_f64s.as_polymorph(),
|
||||||
field_id_docid_facet_values_readers,
|
field_id_docid_facet_numbers_readers,
|
||||||
|
field_id_docid_facet_values_merge,
|
||||||
|
write_method,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
database_count += 1;
|
||||||
|
progress_callback(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
|
databases_seen: database_count,
|
||||||
|
total_databases,
|
||||||
|
});
|
||||||
|
|
||||||
|
debug!("Writing the field id docid facet strings into LMDB on disk...");
|
||||||
|
merge_into_lmdb_database(
|
||||||
|
self.wtxn,
|
||||||
|
*self.index.field_id_docid_facet_strings.as_polymorph(),
|
||||||
|
field_id_docid_facet_strings_readers,
|
||||||
field_id_docid_facet_values_merge,
|
field_id_docid_facet_values_merge,
|
||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
@ -678,9 +705,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
write_method,
|
write_method,
|
||||||
)?;
|
)?;
|
||||||
},
|
},
|
||||||
DatabaseType::FacetLevel0ValuesDocids => {
|
DatabaseType::FacetLevel0NumbersDocids => {
|
||||||
debug!("Writing the facet level 0 values docids into LMDB on disk...");
|
debug!("Writing the facet numbers docids into LMDB on disk...");
|
||||||
let db = *self.index.facet_field_id_value_docids.as_polymorph();
|
let db = *self.index.facet_id_f64_docids.as_polymorph();
|
||||||
write_into_lmdb_database(
|
write_into_lmdb_database(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
db,
|
db,
|
||||||
|
@ -6,25 +6,24 @@ use std::iter::FromIterator;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{cmp, iter};
|
use std::{cmp, iter};
|
||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::Context;
|
||||||
use bstr::ByteSlice as _;
|
use bstr::ByteSlice as _;
|
||||||
use fst::Set;
|
use fst::Set;
|
||||||
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
use grenad::{Reader, FileFuse, Writer, Sorter, CompressionType};
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use linked_hash_map::LinkedHashMap;
|
use linked_hash_map::LinkedHashMap;
|
||||||
use log::{debug, info, warn};
|
use log::{debug, info};
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token, TokenKind, token::SeparatorKind};
|
||||||
use ordered_float::OrderedFloat;
|
use ordered_float::OrderedFloat;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tempfile::tempfile;
|
use tempfile::tempfile;
|
||||||
|
|
||||||
use crate::facet::{FacetType, FacetValue};
|
|
||||||
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec};
|
||||||
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec};
|
||||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||||
use crate::update::UpdateIndexingStep;
|
use crate::update::UpdateIndexingStep;
|
||||||
use crate::{json_to_string, SmallVec8, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap};
|
use crate::{json_to_string, SmallVec32, Position, DocumentId, FieldId, FieldsIdsMap};
|
||||||
|
|
||||||
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
use super::{MergeFn, create_writer, create_sorter, writer_into_reader};
|
||||||
use super::merge_function::{
|
use super::merge_function::{
|
||||||
@ -45,8 +44,10 @@ pub struct Readers {
|
|||||||
pub docid_word_positions: Reader<FileFuse>,
|
pub docid_word_positions: Reader<FileFuse>,
|
||||||
pub words_pairs_proximities_docids: Reader<FileFuse>,
|
pub words_pairs_proximities_docids: Reader<FileFuse>,
|
||||||
pub word_level_position_docids: Reader<FileFuse>,
|
pub word_level_position_docids: Reader<FileFuse>,
|
||||||
pub facet_field_value_docids: Reader<FileFuse>,
|
pub facet_field_numbers_docids: Reader<FileFuse>,
|
||||||
pub field_id_docid_facet_values: Reader<FileFuse>,
|
pub facet_field_strings_docids: Reader<FileFuse>,
|
||||||
|
pub field_id_docid_facet_numbers: Reader<FileFuse>,
|
||||||
|
pub field_id_docid_facet_strings: Reader<FileFuse>,
|
||||||
pub documents: Reader<FileFuse>,
|
pub documents: Reader<FileFuse>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,13 +56,14 @@ pub struct Store<'s, A> {
|
|||||||
primary_key: String,
|
primary_key: String,
|
||||||
fields_ids_map: FieldsIdsMap,
|
fields_ids_map: FieldsIdsMap,
|
||||||
searchable_fields: HashSet<FieldId>,
|
searchable_fields: HashSet<FieldId>,
|
||||||
faceted_fields: HashMap<FieldId, FacetType>,
|
faceted_fields: HashSet<FieldId>,
|
||||||
// Caches
|
// Caches
|
||||||
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
word_docids: LinkedHashMap<SmallVec32<u8>, RoaringBitmap>,
|
||||||
word_docids_limit: usize,
|
word_docids_limit: usize,
|
||||||
words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
words_pairs_proximities_docids: LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
||||||
words_pairs_proximities_docids_limit: usize,
|
words_pairs_proximities_docids_limit: usize,
|
||||||
facet_field_value_docids: LinkedHashMap<(u8, FacetValue), RoaringBitmap>,
|
facet_field_number_docids: LinkedHashMap<(FieldId, OrderedFloat<f64>), RoaringBitmap>,
|
||||||
|
facet_field_string_docids: LinkedHashMap<(FieldId, String), RoaringBitmap>,
|
||||||
facet_field_value_docids_limit: usize,
|
facet_field_value_docids_limit: usize,
|
||||||
// MTBL parameters
|
// MTBL parameters
|
||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
@ -72,8 +74,10 @@ pub struct Store<'s, A> {
|
|||||||
word_docids_sorter: Sorter<MergeFn>,
|
word_docids_sorter: Sorter<MergeFn>,
|
||||||
words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
|
words_pairs_proximities_docids_sorter: Sorter<MergeFn>,
|
||||||
word_level_position_docids_sorter: Sorter<MergeFn>,
|
word_level_position_docids_sorter: Sorter<MergeFn>,
|
||||||
facet_field_value_docids_sorter: Sorter<MergeFn>,
|
facet_field_numbers_docids_sorter: Sorter<MergeFn>,
|
||||||
field_id_docid_facet_values_sorter: Sorter<MergeFn>,
|
facet_field_strings_docids_sorter: Sorter<MergeFn>,
|
||||||
|
field_id_docid_facet_numbers_sorter: Sorter<MergeFn>,
|
||||||
|
field_id_docid_facet_strings_sorter: Sorter<MergeFn>,
|
||||||
// MTBL writers
|
// MTBL writers
|
||||||
docid_word_positions_writer: Writer<File>,
|
docid_word_positions_writer: Writer<File>,
|
||||||
documents_writer: Writer<File>,
|
documents_writer: Writer<File>,
|
||||||
@ -86,7 +90,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
primary_key: String,
|
primary_key: String,
|
||||||
fields_ids_map: FieldsIdsMap,
|
fields_ids_map: FieldsIdsMap,
|
||||||
searchable_fields: HashSet<FieldId>,
|
searchable_fields: HashSet<FieldId>,
|
||||||
faceted_fields: HashMap<FieldId, FacetType>,
|
faceted_fields: HashSet<FieldId>,
|
||||||
linked_hash_map_size: Option<usize>,
|
linked_hash_map_size: Option<usize>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
@ -132,7 +136,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_nb_chunks,
|
max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let facet_field_value_docids_sorter = create_sorter(
|
let facet_field_numbers_docids_sorter = create_sorter(
|
||||||
facet_field_value_docids_merge,
|
facet_field_value_docids_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
@ -140,7 +144,23 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
max_nb_chunks,
|
max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
);
|
);
|
||||||
let field_id_docid_facet_values_sorter = create_sorter(
|
let facet_field_strings_docids_sorter = create_sorter(
|
||||||
|
facet_field_value_docids_merge,
|
||||||
|
chunk_compression_type,
|
||||||
|
chunk_compression_level,
|
||||||
|
chunk_fusing_shrink_size,
|
||||||
|
max_nb_chunks,
|
||||||
|
max_memory,
|
||||||
|
);
|
||||||
|
let field_id_docid_facet_numbers_sorter = create_sorter(
|
||||||
|
field_id_docid_facet_values_merge,
|
||||||
|
chunk_compression_type,
|
||||||
|
chunk_compression_level,
|
||||||
|
chunk_fusing_shrink_size,
|
||||||
|
max_nb_chunks,
|
||||||
|
Some(1024 * 1024 * 1024), // 1MB
|
||||||
|
);
|
||||||
|
let field_id_docid_facet_strings_sorter = create_sorter(
|
||||||
field_id_docid_facet_values_merge,
|
field_id_docid_facet_values_merge,
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
chunk_compression_level,
|
chunk_compression_level,
|
||||||
@ -173,7 +193,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
word_docids_limit: linked_hash_map_size,
|
word_docids_limit: linked_hash_map_size,
|
||||||
words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
words_pairs_proximities_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
words_pairs_proximities_docids_limit: linked_hash_map_size,
|
words_pairs_proximities_docids_limit: linked_hash_map_size,
|
||||||
facet_field_value_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
facet_field_number_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
|
facet_field_string_docids: LinkedHashMap::with_capacity(linked_hash_map_size),
|
||||||
facet_field_value_docids_limit: linked_hash_map_size,
|
facet_field_value_docids_limit: linked_hash_map_size,
|
||||||
// MTBL parameters
|
// MTBL parameters
|
||||||
chunk_compression_type,
|
chunk_compression_type,
|
||||||
@ -184,8 +205,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
word_docids_sorter,
|
word_docids_sorter,
|
||||||
words_pairs_proximities_docids_sorter,
|
words_pairs_proximities_docids_sorter,
|
||||||
word_level_position_docids_sorter,
|
word_level_position_docids_sorter,
|
||||||
facet_field_value_docids_sorter,
|
facet_field_numbers_docids_sorter,
|
||||||
field_id_docid_facet_values_sorter,
|
facet_field_strings_docids_sorter,
|
||||||
|
field_id_docid_facet_numbers_sorter,
|
||||||
|
field_id_docid_facet_strings_sorter,
|
||||||
// MTBL writers
|
// MTBL writers
|
||||||
docid_word_positions_writer,
|
docid_word_positions_writer,
|
||||||
documents_writer,
|
documents_writer,
|
||||||
@ -215,34 +238,68 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the documents ids under the facet field id and value we have seen it.
|
fn insert_facet_number_values_docid(
|
||||||
fn insert_facet_values_docid(
|
|
||||||
&mut self,
|
&mut self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
field_value: FacetValue,
|
value: OrderedFloat<f64>,
|
||||||
id: DocumentId,
|
id: DocumentId,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
Self::write_field_id_docid_facet_value(&mut self.field_id_docid_facet_values_sorter, field_id, id, &field_value)?;
|
let sorter = &mut self.field_id_docid_facet_numbers_sorter;
|
||||||
|
Self::write_field_id_docid_facet_number_value(sorter, field_id, id, value)?;
|
||||||
|
|
||||||
let key = (field_id, field_value);
|
let key = (field_id, value);
|
||||||
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
||||||
match self.facet_field_value_docids.get_refresh(&key) {
|
match self.facet_field_number_docids.get_refresh(&key) {
|
||||||
Some(old) => { old.insert(id); },
|
Some(old) => { old.insert(id); },
|
||||||
None => {
|
None => {
|
||||||
// A newly inserted element is append at the end of the linked hash map.
|
// A newly inserted element is append at the end of the linked hash map.
|
||||||
self.facet_field_value_docids.insert(key, RoaringBitmap::from_iter(Some(id)));
|
self.facet_field_number_docids.insert(key, RoaringBitmap::from_iter(Some(id)));
|
||||||
// If the word docids just reached it's capacity we must make sure to remove
|
// If the word docids just reached it's capacity we must make sure to remove
|
||||||
// one element, this way next time we insert we doesn't grow the capacity.
|
// one element, this way next time we insert we doesn't grow the capacity.
|
||||||
if self.facet_field_value_docids.len() == self.facet_field_value_docids_limit {
|
if self.facet_field_number_docids.len() == self.facet_field_value_docids_limit {
|
||||||
// Removing the front element is equivalent to removing the LRU element.
|
// Removing the front element is equivalent to removing the LRU element.
|
||||||
Self::write_facet_field_value_docids(
|
Self::write_facet_field_number_docids(
|
||||||
&mut self.facet_field_value_docids_sorter,
|
&mut self.facet_field_numbers_docids_sorter,
|
||||||
self.facet_field_value_docids.pop_front(),
|
self.facet_field_number_docids.pop_front(),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the documents ids under the facet field id and value we have seen it.
|
||||||
|
fn insert_facet_string_values_docid(
|
||||||
|
&mut self,
|
||||||
|
field_id: FieldId,
|
||||||
|
value: String,
|
||||||
|
id: DocumentId,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
{
|
||||||
|
let sorter = &mut self.field_id_docid_facet_strings_sorter;
|
||||||
|
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
|
||||||
|
|
||||||
|
let key = (field_id, value);
|
||||||
|
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
||||||
|
match self.facet_field_string_docids.get_refresh(&key) {
|
||||||
|
Some(old) => { old.insert(id); },
|
||||||
|
None => {
|
||||||
|
// A newly inserted element is append at the end of the linked hash map.
|
||||||
|
self.facet_field_string_docids.insert(key, RoaringBitmap::from_iter(Some(id)));
|
||||||
|
// If the word docids just reached it's capacity we must make sure to remove
|
||||||
|
// one element, this way next time we insert we doesn't grow the capacity.
|
||||||
|
if self.facet_field_string_docids.len() == self.facet_field_value_docids_limit {
|
||||||
|
// Removing the front element is equivalent to removing the LRU element.
|
||||||
|
Self::write_facet_field_string_docids(
|
||||||
|
&mut self.facet_field_strings_docids_sorter,
|
||||||
|
self.facet_field_string_docids.pop_front(),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -287,7 +344,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
words_positions: &mut HashMap<String, SmallVec32<Position>>,
|
words_positions: &mut HashMap<String, SmallVec32<Position>>,
|
||||||
facet_values: &mut HashMap<FieldId, SmallVec8<FacetValue>>,
|
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
|
||||||
|
facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
|
||||||
record: &[u8],
|
record: &[u8],
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
@ -306,10 +364,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
|
|
||||||
words_positions.clear();
|
words_positions.clear();
|
||||||
|
|
||||||
// We store document_id associated with all the field id and values.
|
// We store document_id associated with all the facet numbers fields ids and values.
|
||||||
for (field, values) in facet_values.drain() {
|
for (field, values) in facet_numbers_values.drain() {
|
||||||
for value in values {
|
for value in values {
|
||||||
self.insert_facet_values_docid(field, value, document_id)?;
|
let value = OrderedFloat::from(value);
|
||||||
|
self.insert_facet_number_values_docid(field, value, document_id)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We store document_id associated with all the facet strings fields ids and values.
|
||||||
|
for (field, values) in facet_strings_values.drain() {
|
||||||
|
for value in values {
|
||||||
|
self.insert_facet_string_values_docid(field, value, document_id)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -409,20 +475,16 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_facet_field_value_docids<I>(
|
fn write_facet_field_string_docids<I>(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn>,
|
||||||
iter: I,
|
iter: I,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
where I: IntoIterator<Item=((FieldId, FacetValue), RoaringBitmap)>
|
where I: IntoIterator<Item=((FieldId, String), RoaringBitmap)>
|
||||||
{
|
{
|
||||||
use FacetValue::*;
|
|
||||||
|
|
||||||
for ((field_id, value), docids) in iter {
|
for ((field_id, value), docids) in iter {
|
||||||
let result = match value {
|
let key = FacetValueStringCodec::bytes_encode(&(field_id, &value))
|
||||||
String(s) => FacetValueStringCodec::bytes_encode(&(field_id, &s)).map(Cow::into_owned),
|
.map(Cow::into_owned)
|
||||||
Number(f) => FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *f, *f)).map(Cow::into_owned),
|
.context("could not serialize facet key")?;
|
||||||
};
|
|
||||||
let key = result.context("could not serialize facet key")?;
|
|
||||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||||
.context("could not serialize docids")?;
|
.context("could not serialize docids")?;
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
@ -433,21 +495,55 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_field_id_docid_facet_value(
|
fn write_facet_field_number_docids<I>(
|
||||||
|
sorter: &mut Sorter<MergeFn>,
|
||||||
|
iter: I,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
where I: IntoIterator<Item=((FieldId, OrderedFloat<f64>), RoaringBitmap)>
|
||||||
|
{
|
||||||
|
for ((field_id, value), docids) in iter {
|
||||||
|
let key = FacetLevelValueF64Codec::bytes_encode(&(field_id, 0, *value, *value))
|
||||||
|
.map(Cow::into_owned)
|
||||||
|
.context("could not serialize facet key")?;
|
||||||
|
let bytes = CboRoaringBitmapCodec::bytes_encode(&docids)
|
||||||
|
.context("could not serialize docids")?;
|
||||||
|
if lmdb_key_valid_size(&key) {
|
||||||
|
sorter.insert(&key, &bytes)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_field_id_docid_facet_number_value(
|
||||||
sorter: &mut Sorter<MergeFn>,
|
sorter: &mut Sorter<MergeFn>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
document_id: DocumentId,
|
document_id: DocumentId,
|
||||||
value: &FacetValue,
|
value: OrderedFloat<f64>,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
{
|
{
|
||||||
use FacetValue::*;
|
let key = FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, *value))
|
||||||
|
.map(Cow::into_owned)
|
||||||
|
.context("could not serialize facet key")?;
|
||||||
|
|
||||||
let result = match value {
|
if lmdb_key_valid_size(&key) {
|
||||||
String(s) => FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, s)).map(Cow::into_owned),
|
sorter.insert(&key, &[])?;
|
||||||
Number(f) => FieldDocIdFacetF64Codec::bytes_encode(&(field_id, document_id, **f)).map(Cow::into_owned),
|
}
|
||||||
};
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_field_id_docid_facet_string_value(
|
||||||
|
sorter: &mut Sorter<MergeFn>,
|
||||||
|
field_id: FieldId,
|
||||||
|
document_id: DocumentId,
|
||||||
|
value: &str,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
{
|
||||||
|
let key = FieldDocIdFacetStringCodec::bytes_encode(&(field_id, document_id, value))
|
||||||
|
.map(Cow::into_owned)
|
||||||
|
.context("could not serialize facet key")?;
|
||||||
|
|
||||||
let key = result.context("could not serialize facet key")?;
|
|
||||||
if lmdb_key_valid_size(&key) {
|
if lmdb_key_valid_size(&key) {
|
||||||
sorter.insert(&key, &[])?;
|
sorter.insert(&key, &[])?;
|
||||||
}
|
}
|
||||||
@ -493,7 +589,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
|
|
||||||
let mut before = Instant::now();
|
let mut before = Instant::now();
|
||||||
let mut words_positions = HashMap::new();
|
let mut words_positions = HashMap::new();
|
||||||
let mut facet_values = HashMap::new();
|
let mut facet_numbers_values = HashMap::new();
|
||||||
|
let mut facet_strings_values = HashMap::new();
|
||||||
|
|
||||||
let mut count: usize = 0;
|
let mut count: usize = 0;
|
||||||
while let Some((key, value)) = documents.next()? {
|
while let Some((key, value)) = documents.next()? {
|
||||||
@ -513,32 +610,12 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (attr, content) in document.iter() {
|
for (attr, content) in document.iter() {
|
||||||
if self.faceted_fields.contains_key(&attr) || self.searchable_fields.contains(&attr) {
|
if self.faceted_fields.contains(&attr) || self.searchable_fields.contains(&attr) {
|
||||||
let value = serde_json::from_slice(content)?;
|
let value = serde_json::from_slice(content)?;
|
||||||
|
|
||||||
if let Some(ftype) = self.faceted_fields.get(&attr) {
|
let (facet_numbers, facet_strings) = extract_facet_values(&value);
|
||||||
let mut values = match parse_facet_value(*ftype, &value) {
|
facet_numbers_values.entry(attr).or_insert_with(Vec::new).extend(facet_numbers);
|
||||||
Ok(values) => values,
|
facet_strings_values.entry(attr).or_insert_with(Vec::new).extend(facet_strings);
|
||||||
Err(e) => {
|
|
||||||
// We extract the name of the attribute and the document id
|
|
||||||
// to help users debug a facet type conversion.
|
|
||||||
let attr_name = self.fields_ids_map.name(attr).unwrap();
|
|
||||||
let document_id: Value = self.fields_ids_map.id(&self.primary_key)
|
|
||||||
.and_then(|fid| document.get(fid))
|
|
||||||
.map(serde_json::from_slice)
|
|
||||||
.unwrap()?;
|
|
||||||
|
|
||||||
let context = format!(
|
|
||||||
"while extracting facet from the {:?} attribute in the {} document",
|
|
||||||
attr_name, document_id,
|
|
||||||
);
|
|
||||||
warn!("{}", e.context(context));
|
|
||||||
|
|
||||||
SmallVec8::default()
|
|
||||||
},
|
|
||||||
};
|
|
||||||
facet_values.entry(attr).or_insert_with(SmallVec8::new).extend(values.drain(..));
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.searchable_fields.contains(&attr) {
|
if self.searchable_fields.contains(&attr) {
|
||||||
let content = match json_to_string(&value) {
|
let content = match json_to_string(&value) {
|
||||||
@ -558,7 +635,13 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We write the document in the documents store.
|
// We write the document in the documents store.
|
||||||
self.write_document(document_id, &mut words_positions, &mut facet_values, value)?;
|
self.write_document(
|
||||||
|
document_id,
|
||||||
|
&mut words_positions,
|
||||||
|
&mut facet_numbers_values,
|
||||||
|
&mut facet_strings_values,
|
||||||
|
value,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute the document id of the next document.
|
// Compute the document id of the next document.
|
||||||
@ -585,9 +668,14 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
&mut self.words_pairs_proximities_docids_sorter,
|
&mut self.words_pairs_proximities_docids_sorter,
|
||||||
self.words_pairs_proximities_docids,
|
self.words_pairs_proximities_docids,
|
||||||
)?;
|
)?;
|
||||||
Self::write_facet_field_value_docids(
|
Self::write_facet_field_number_docids(
|
||||||
&mut self.facet_field_value_docids_sorter,
|
&mut self.facet_field_numbers_docids_sorter,
|
||||||
self.facet_field_value_docids,
|
self.facet_field_number_docids,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Self::write_facet_field_string_docids(
|
||||||
|
&mut self.facet_field_strings_docids_sorter,
|
||||||
|
self.facet_field_string_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut word_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
let mut word_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
@ -613,18 +701,26 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
let mut word_level_position_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
let mut word_level_position_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
self.word_level_position_docids_sorter.write_into(&mut word_level_position_docids_wtr)?;
|
self.word_level_position_docids_sorter.write_into(&mut word_level_position_docids_wtr)?;
|
||||||
|
|
||||||
let mut facet_field_value_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
let mut facet_field_numbers_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
self.facet_field_value_docids_sorter.write_into(&mut facet_field_value_docids_wtr)?;
|
self.facet_field_numbers_docids_sorter.write_into(&mut facet_field_numbers_docids_wtr)?;
|
||||||
|
|
||||||
let mut field_id_docid_facet_values_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
let mut facet_field_strings_docids_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
self.field_id_docid_facet_values_sorter.write_into(&mut field_id_docid_facet_values_wtr)?;
|
self.facet_field_strings_docids_sorter.write_into(&mut facet_field_strings_docids_wtr)?;
|
||||||
|
|
||||||
|
let mut field_id_docid_facet_numbers_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
|
self.field_id_docid_facet_numbers_sorter.write_into(&mut field_id_docid_facet_numbers_wtr)?;
|
||||||
|
|
||||||
|
let mut field_id_docid_facet_strings_wtr = tempfile().and_then(|f| create_writer(comp_type, comp_level, f))?;
|
||||||
|
self.field_id_docid_facet_strings_sorter.write_into(&mut field_id_docid_facet_strings_wtr)?;
|
||||||
|
|
||||||
let main = writer_into_reader(main_wtr, shrink_size)?;
|
let main = writer_into_reader(main_wtr, shrink_size)?;
|
||||||
let word_docids = writer_into_reader(word_docids_wtr, shrink_size)?;
|
let word_docids = writer_into_reader(word_docids_wtr, shrink_size)?;
|
||||||
let words_pairs_proximities_docids = writer_into_reader(words_pairs_proximities_docids_wtr, shrink_size)?;
|
let words_pairs_proximities_docids = writer_into_reader(words_pairs_proximities_docids_wtr, shrink_size)?;
|
||||||
let word_level_position_docids = writer_into_reader(word_level_position_docids_wtr, shrink_size)?;
|
let word_level_position_docids = writer_into_reader(word_level_position_docids_wtr, shrink_size)?;
|
||||||
let facet_field_value_docids = writer_into_reader(facet_field_value_docids_wtr, shrink_size)?;
|
let facet_field_numbers_docids = writer_into_reader(facet_field_numbers_docids_wtr, shrink_size)?;
|
||||||
let field_id_docid_facet_values = writer_into_reader(field_id_docid_facet_values_wtr, shrink_size)?;
|
let facet_field_strings_docids = writer_into_reader(facet_field_strings_docids_wtr, shrink_size)?;
|
||||||
|
let field_id_docid_facet_numbers = writer_into_reader(field_id_docid_facet_numbers_wtr, shrink_size)?;
|
||||||
|
let field_id_docid_facet_strings = writer_into_reader(field_id_docid_facet_strings_wtr, shrink_size)?;
|
||||||
let docid_word_positions = writer_into_reader(self.docid_word_positions_writer, shrink_size)?;
|
let docid_word_positions = writer_into_reader(self.docid_word_positions_writer, shrink_size)?;
|
||||||
let documents = writer_into_reader(self.documents_writer, shrink_size)?;
|
let documents = writer_into_reader(self.documents_writer, shrink_size)?;
|
||||||
|
|
||||||
@ -634,8 +730,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
|||||||
docid_word_positions,
|
docid_word_positions,
|
||||||
words_pairs_proximities_docids,
|
words_pairs_proximities_docids,
|
||||||
word_level_position_docids,
|
word_level_position_docids,
|
||||||
facet_field_value_docids,
|
facet_field_numbers_docids,
|
||||||
field_id_docid_facet_values,
|
facet_field_strings_docids,
|
||||||
|
field_id_docid_facet_numbers,
|
||||||
|
field_id_docid_facet_strings,
|
||||||
documents,
|
documents,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -710,71 +808,36 @@ fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<
|
|||||||
.filter(|(_, t)| t.is_word())
|
.filter(|(_, t)| t.is_word())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_facet_value(ftype: FacetType, value: &Value) -> anyhow::Result<SmallVec8<FacetValue>> {
|
fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<String>) {
|
||||||
use FacetValue::*;
|
fn inner_extract_facet_values(
|
||||||
|
|
||||||
fn inner_parse_facet_value(
|
|
||||||
ftype: FacetType,
|
|
||||||
value: &Value,
|
value: &Value,
|
||||||
can_recurse: bool,
|
can_recurse: bool,
|
||||||
output: &mut SmallVec8<FacetValue>,
|
output_numbers: &mut Vec<f64>,
|
||||||
) -> anyhow::Result<()>
|
output_strings: &mut Vec<String>,
|
||||||
{
|
) {
|
||||||
match value {
|
match value {
|
||||||
Value::Null => Ok(()),
|
Value::Null => (),
|
||||||
Value::Bool(b) => match ftype {
|
Value::Bool(b) => output_strings.push(b.to_string()),
|
||||||
FacetType::String => {
|
Value::Number(number) => if let Some(float) = number.as_f64() {
|
||||||
output.push(String(b.to_string()));
|
output_numbers.push(float);
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
FacetType::Number => {
|
|
||||||
output.push(Number(OrderedFloat(if *b { 1.0 } else { 0.0 })));
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Value::Number(number) => match ftype {
|
|
||||||
FacetType::String => {
|
|
||||||
output.push(String(number.to_string()));
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
FacetType::Number => match number.as_f64() {
|
|
||||||
Some(float) => {
|
|
||||||
output.push(Number(OrderedFloat(float)));
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
None => bail!("invalid facet type, expecting {} found number", ftype),
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
Value::String(string) => {
|
Value::String(string) => {
|
||||||
// TODO must be normalized and not only lowercased.
|
// TODO must be normalized and not only lowercased.
|
||||||
let string = string.trim().to_lowercase();
|
let string = string.trim().to_lowercase();
|
||||||
match ftype {
|
output_strings.push(string);
|
||||||
FacetType::String => {
|
|
||||||
output.push(String(string));
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
FacetType::Number => match string.parse() {
|
|
||||||
Ok(float) => {
|
|
||||||
output.push(Number(OrderedFloat(float)));
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
Err(_err) => bail!("invalid facet type, expecting {} found string", ftype),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Value::Array(values) => if can_recurse {
|
Value::Array(values) => if can_recurse {
|
||||||
values.iter().map(|v| inner_parse_facet_value(ftype, v, false, output)).collect()
|
for value in values {
|
||||||
} else {
|
inner_extract_facet_values(value, false, output_numbers, output_strings);
|
||||||
bail!(
|
}
|
||||||
"invalid facet type, expecting {} found array (recursive arrays are not supported)",
|
|
||||||
ftype,
|
|
||||||
);
|
|
||||||
},
|
},
|
||||||
Value::Object(_) => bail!("invalid facet type, expecting {} found object", ftype),
|
Value::Object(_) => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut facet_values = SmallVec8::new();
|
let mut facet_number_values = Vec::new();
|
||||||
inner_parse_facet_value(ftype, value, true, &mut facet_values)?;
|
let mut facet_string_values = Vec::new();
|
||||||
Ok(facet_values)
|
inner_extract_facet_values(value, true, &mut facet_number_values, &mut facet_string_values);
|
||||||
|
|
||||||
|
(facet_number_values, facet_string_values)
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{BTreeSet, HashMap};
|
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
@ -11,7 +11,6 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
|||||||
|
|
||||||
use crate::{FieldsIdsMap, Index};
|
use crate::{FieldsIdsMap, Index};
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
use crate::update::index_documents::{IndexDocumentsMethod, Transform};
|
||||||
|
|
||||||
@ -68,7 +67,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
|
|
||||||
searchable_fields: Setting<Vec<String>>,
|
searchable_fields: Setting<Vec<String>>,
|
||||||
displayed_fields: Setting<Vec<String>>,
|
displayed_fields: Setting<Vec<String>>,
|
||||||
faceted_fields: Setting<HashMap<String, String>>,
|
faceted_fields: Setting<HashSet<String>>,
|
||||||
criteria: Setting<Vec<String>>,
|
criteria: Setting<Vec<String>>,
|
||||||
stop_words: Setting<BTreeSet<String>>,
|
stop_words: Setting<BTreeSet<String>>,
|
||||||
distinct_attribute: Setting<String>,
|
distinct_attribute: Setting<String>,
|
||||||
@ -123,7 +122,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.faceted_fields = Setting::Reset;
|
self.faceted_fields = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_faceted_fields(&mut self, names_facet_types: HashMap<String, String>) {
|
pub fn set_faceted_fields(&mut self, names_facet_types: HashSet<String>) {
|
||||||
self.faceted_fields = Setting::Set(names_facet_types);
|
self.faceted_fields = Setting::Set(names_facet_types);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -387,11 +386,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
match self.faceted_fields {
|
match self.faceted_fields {
|
||||||
Setting::Set(ref fields) => {
|
Setting::Set(ref fields) => {
|
||||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
let mut new_facets = HashMap::new();
|
let mut new_facets = HashSet::new();
|
||||||
for (name, ty) in fields {
|
for name in fields {
|
||||||
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
fields_ids_map.insert(name).context("field id limit exceeded")?;
|
||||||
let ty = FacetType::from_str(&ty)?;
|
new_facets.insert(name.clone());
|
||||||
new_facets.insert(name.clone(), ty);
|
|
||||||
}
|
}
|
||||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user