Use the De Morgan law to simplify the NOT operation

This commit is contained in:
Clément Renault 2020-11-23 13:08:57 +01:00
parent 7370ef8c5e
commit fc686aaca7
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
9 changed files with 182 additions and 58 deletions

View File

@ -197,7 +197,7 @@ enum UpdateMeta {
DocumentsAddition { method: String, format: String },
ClearDocuments,
Settings(Settings),
FacetLevels(FacetLevels),
Facets(Facets),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -236,7 +236,7 @@ struct Settings {
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
struct FacetLevels {
struct Facets {
last_level_size: Option<NonZeroUsize>,
number_of_levels: Option<NonZeroUsize>,
easing_function: Option<String>,
@ -411,10 +411,10 @@ async fn main() -> anyhow::Result<()> {
Err(e) => Err(e.into())
}
},
UpdateMeta::FacetLevels(levels) => {
UpdateMeta::Facets(levels) => {
// We must use the write transaction of the update here.
let mut wtxn = index_cloned.write_txn()?;
let mut builder = update_builder.facet_levels(&mut wtxn, &index_cloned);
let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
if let Some(value) = levels.last_level_size {
builder.last_level_size(value);
}
@ -806,8 +806,8 @@ async fn main() -> anyhow::Result<()> {
let change_facet_levels_route = warp::filters::method::post()
.and(warp::path!("facet-levels"))
.and(warp::body::json())
.map(move |levels: FacetLevels| {
let meta = UpdateMeta::FacetLevels(levels);
.map(move |levels: Facets| {
let meta = UpdateMeta::Facets(levels);
let update_id = update_store_cloned.register_update(&meta, &[]).unwrap();
let _ = update_status_sender_cloned.send(UpdateStatus::Pending { update_id, meta });
eprintln!("update {} registered", update_id);

View File

@ -18,6 +18,7 @@ use crate::{
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FACETED_DOCUMENTS_IDS_PREFIX: &str = "faceted-documents-ids";
pub const FACETED_FIELDS_KEY: &str = "faceted-fields";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const PRIMARY_KEY_KEY: &str = "primary-key";
@ -224,6 +225,27 @@ impl Index {
Ok(self.main.get::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY)?.unwrap_or_default())
}
/* faceted documents ids */
/// Writes the documents ids that are faceted under this field id.
pub fn put_faceted_documents_ids(&self, wtxn: &mut RwTxn, field_id: u8, docids: &RoaringBitmap) -> heed::Result<()> {
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids)
}
/// Retrieve all the documents ids that faceted under this field id.
pub fn faceted_documents_ids(&self, rtxn: &RoTxn, field_id: u8) -> heed::Result<RoaringBitmap> {
let mut buffer = [0u8; FACETED_DOCUMENTS_IDS_PREFIX.len() + 1];
buffer[..FACETED_DOCUMENTS_IDS_PREFIX.len()].clone_from_slice(FACETED_DOCUMENTS_IDS_PREFIX.as_bytes());
*buffer.last_mut().unwrap() = field_id;
match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new()),
}
}
/* words fst */
/// Writes the FST which is the words dictionnary of the engine.

View File

@ -26,15 +26,42 @@ mod parser;
pub enum FacetNumberOperator<T> {
GreaterThan(T),
GreaterThanOrEqual(T),
Equal(T),
NotEqual(T),
LowerThan(T),
LowerThanOrEqual(T),
Equal(T),
Between(T, T),
}
impl<T> FacetNumberOperator<T> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
fn negate(self) -> (Self, Option<Self>) {
match self {
GreaterThan(x) => (LowerThanOrEqual(x), None),
GreaterThanOrEqual(x) => (LowerThan(x), None),
Equal(x) => (NotEqual(x), None),
NotEqual(x) => (Equal(x), None),
LowerThan(x) => (GreaterThanOrEqual(x), None),
LowerThanOrEqual(x) => (GreaterThan(x), None),
Between(x, y) => (LowerThan(x), Some(GreaterThan(y))),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum FacetStringOperator {
Equal(String),
NotEqual(String),
}
impl FacetStringOperator {
fn negate(self) -> Self {
match self {
FacetStringOperator::Equal(x) => FacetStringOperator::NotEqual(x),
FacetStringOperator::NotEqual(x) => FacetStringOperator::Equal(x),
}
}
}
#[derive(Debug, Clone, PartialEq)]
@ -44,7 +71,6 @@ pub enum FacetCondition {
OperatorString(u8, FacetStringOperator),
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
Not(Box<Self>),
}
fn get_field_id_facet_type<'a>(
@ -106,24 +132,24 @@ impl FacetCondition {
fim: &FieldsIdsMap,
ff: &HashMap<u8, FacetType>,
expression: Pairs<Rule>,
) -> anyhow::Result<FacetCondition>
) -> anyhow::Result<Self>
{
PREC_CLIMBER.climb(
expression,
|pair: Pair<Rule>| match pair.as_rule() {
Rule::between => Ok(FacetCondition::between(fim, ff, pair)?),
Rule::eq => Ok(FacetCondition::equal(fim, ff, pair)?),
Rule::neq => Ok(Not(Box::new(FacetCondition::equal(fim, ff, pair)?))),
Rule::greater => Ok(FacetCondition::greater_than(fim, ff, pair)?),
Rule::geq => Ok(FacetCondition::greater_than_or_equal(fim, ff, pair)?),
Rule::less => Ok(FacetCondition::lower_than(fim, ff, pair)?),
Rule::leq => Ok(FacetCondition::lower_than_or_equal(fim, ff, pair)?),
Rule::greater => Ok(Self::greater_than(fim, ff, pair)?),
Rule::geq => Ok(Self::greater_than_or_equal(fim, ff, pair)?),
Rule::eq => Ok(Self::equal(fim, ff, pair)?),
Rule::neq => Ok(Self::equal(fim, ff, pair)?.negate()),
Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?),
Rule::less => Ok(Self::lower_than(fim, ff, pair)?),
Rule::between => Ok(Self::between(fim, ff, pair)?),
Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()),
Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()),
Rule::term => Self::from_pairs(fim, ff, pair.into_inner()),
Rule::not => Ok(Not(Box::new(Self::from_pairs(fim, ff, pair.into_inner())?))),
_ => unreachable!(),
},
|lhs: anyhow::Result<FacetCondition>, op: Pair<Rule>, rhs: anyhow::Result<FacetCondition>| {
|lhs: anyhow::Result<Self>, op: Pair<Rule>, rhs: anyhow::Result<Self>| {
match op.as_rule() {
Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))),
Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))),
@ -133,6 +159,22 @@ impl FacetCondition {
)
}
fn negate(self) -> FacetCondition {
match self {
OperatorI64(fid, op) => match op.negate() {
(op, None) => OperatorI64(fid, op),
(a, Some(b)) => Or(Box::new(OperatorI64(fid, a)), Box::new(OperatorI64(fid, b))),
},
OperatorF64(fid, op) => match op.negate() {
(op, None) => OperatorF64(fid, op),
(a, Some(b)) => Or(Box::new(OperatorF64(fid, a)), Box::new(OperatorF64(fid, b))),
},
OperatorString(fid, op) => OperatorString(fid, op.negate()),
Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())),
And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())),
}
}
fn between(
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashMap<u8, FacetType>,
@ -381,6 +423,7 @@ impl FacetCondition {
fn evaluate_number_operator<'t, T: 't, KC>(
rtxn: &'t heed::RoTxn,
index: &Index,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
operator: FacetNumberOperator<T>,
@ -396,9 +439,14 @@ impl FacetCondition {
let (left, right) = match operator {
GreaterThan(val) => (Excluded(val), Included(T::max_value())),
GreaterThanOrEqual(val) => (Included(val), Included(T::max_value())),
Equal(val) => (Included(val), Included(val)),
NotEqual(val) => {
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
let docids = Self::evaluate_number_operator::<T, KC>(rtxn, index, db, field_id, Equal(val))?;
return Ok(all_documents_ids - docids);
},
LowerThan(val) => (Included(T::min_value()), Excluded(val)),
LowerThanOrEqual(val) => (Included(T::min_value()), Included(val)),
Equal(val) => (Included(val), Included(val)),
Between(left, right) => (Included(left), Included(right)),
};
@ -421,6 +469,7 @@ impl FacetCondition {
fn evaluate_string_operator(
rtxn: &heed::RoTxn,
index: &Index,
db: heed::Database<FacetValueStringCodec, CboRoaringBitmapCodec>,
field_id: u8,
operator: &FacetStringOperator,
@ -432,7 +481,13 @@ impl FacetCondition {
Some(docids) => Ok(docids),
None => Ok(RoaringBitmap::new())
}
}
},
FacetStringOperator::NotEqual(string) => {
let all_documents_ids = index.faceted_documents_ids(rtxn, field_id)?;
let op = FacetStringOperator::Equal(string.clone());
let docids = Self::evaluate_string_operator(rtxn, index, db, field_id, &op)?;
return Ok(all_documents_ids - docids);
},
}
}
@ -445,14 +500,14 @@ impl FacetCondition {
let db = index.facet_field_id_value_docids;
match self {
OperatorI64(fid, op) => {
Self::evaluate_number_operator::<i64, FacetLevelValueI64Codec>(rtxn, db, *fid, *op)
Self::evaluate_number_operator::<i64, FacetLevelValueI64Codec>(rtxn, index, db, *fid, *op)
},
OperatorF64(fid, op) => {
Self::evaluate_number_operator::<f64, FacetLevelValueF64Codec>(rtxn, db, *fid, *op)
Self::evaluate_number_operator::<f64, FacetLevelValueF64Codec>(rtxn, index, db, *fid, *op)
},
OperatorString(fid, op) => {
let db = db.remap_key_type::<FacetValueStringCodec>();
Self::evaluate_string_operator(rtxn, db, *fid, op)
Self::evaluate_string_operator(rtxn, index, db, *fid, op)
},
Or(lhs, rhs) => {
let lhs = lhs.evaluate(rtxn, index)?;
@ -464,13 +519,6 @@ impl FacetCondition {
let rhs = rhs.evaluate(rtxn, index)?;
Ok(lhs & rhs)
},
Not(op) => {
// TODO is this right or is this wrong? because all documents ids are not faceted
// so doing that can return documents that are not faceted at all.
let all_documents_ids = index.documents_ids(rtxn)?;
let documents_ids = op.evaluate(rtxn, index)?;
Ok(all_documents_ids - documents_ids)
},
}
}
}

View File

@ -24,12 +24,18 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
// We retrieve the number of documents ids that we are deleting.
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
// We clean some of the main engine datastructures.
self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
// We clean all the faceted documents ids.
for (field_id, _) in faceted_fields {
self.index.put_faceted_documents_ids(self.wtxn, field_id, &RoaringBitmap::default())?;
}
// Clear the other databases.
word_docids.clear(self.wtxn)?;
docid_word_positions.clear(self.wtxn)?;

View File

@ -184,6 +184,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
drop(iter);
// Remove the documents ids from the faceted documents ids.
let faceted_fields = self.index.faceted_fields(self.wtxn)?;
for (field_id, _) in faceted_fields {
let mut docids = self.index.faceted_documents_ids(self.wtxn, field_id)?;
docids.difference_with(&self.documents_ids);
self.index.put_faceted_documents_ids(self.wtxn, field_id, &docids)?;
}
// We delete the documents ids that are under the facet field id values.
let mut iter = facet_field_id_value_docids.iter_mut(self.wtxn)?;
while let Some(result) = iter.next() {

View File

@ -24,7 +24,7 @@ pub enum EasingName {
Linear,
}
pub struct FacetLevels<'t, 'u, 'i> {
pub struct Facets<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
pub(crate) chunk_compression_type: CompressionType,
@ -35,9 +35,9 @@ pub struct FacetLevels<'t, 'u, 'i> {
easing_function: EasingName,
}
impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> FacetLevels<'t, 'u, 'i> {
FacetLevels {
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> Facets<'t, 'u, 'i> {
Facets {
wtxn,
index,
chunk_compression_type: CompressionType::None,
@ -70,7 +70,7 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
for (field_id, facet_type) in faceted_fields {
let content = match facet_type {
let (content, documents_ids) = match facet_type {
FacetType::Integer => {
clear_field_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
@ -78,7 +78,13 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
field_id,
)?;
compute_facet_levels::<i64, FacetLevelValueI64Codec>(
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<i64, FacetLevelValueI64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
@ -88,7 +94,9 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
self.number_of_levels,
self.easing_function,
field_id,
)?
)?;
(Some(content), documents_ids)
},
FacetType::Float => {
clear_field_levels::<f64, FacetLevelValueF64Codec>(
@ -97,7 +105,13 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
field_id,
)?;
compute_facet_levels::<f64, FacetLevelValueF64Codec>(
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
let content = compute_facet_levels::<f64, FacetLevelValueF64Codec>(
self.wtxn,
self.index.facet_field_id_value_docids,
self.chunk_compression_type,
@ -107,11 +121,22 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
self.number_of_levels,
self.easing_function,
field_id,
)?
)?;
(Some(content), documents_ids)
},
FacetType::String => {
let documents_ids = compute_faceted_documents_ids(
self.wtxn,
self.index.facet_field_id_value_docids,
field_id,
)?;
(None, documents_ids)
},
FacetType::String => continue,
};
if let Some(content) = content {
write_into_lmdb_database(
self.wtxn,
*self.index.facet_field_id_value_docids.as_polymorph(),
@ -121,6 +146,9 @@ impl<'t, 'u, 'i> FacetLevels<'t, 'u, 'i> {
)?;
}
self.index.put_faceted_documents_ids(self.wtxn, field_id, &documents_ids)?;
}
Ok(())
}
}
@ -138,9 +166,7 @@ where
let left = (field_id, 1, T::min_value(), T::min_value());
let right = (field_id, u8::MAX, T::max_value(), T::max_value());
let range = left..=right;
db.remap_key_type::<KC>()
.delete_range(wtxn, &range)
.map(drop)
db.remap_key_type::<KC>().delete_range(wtxn, &range).map(drop)
}
fn compute_facet_levels<'t, T: 't, KC>(
@ -217,6 +243,20 @@ where
writer_into_reader(writer, shrink_size)
}
fn compute_faceted_documents_ids(
rtxn: &heed::RoTxn,
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
field_id: u8,
) -> anyhow::Result<RoaringBitmap>
{
let mut documents_ids = RoaringBitmap::new();
for result in db.prefix_iter(rtxn, &[field_id])? {
let (_key, docids) = result?;
documents_ids.union_with(&docids);
}
Ok(documents_ids)
}
fn write_entry<T, KC>(
writer: &mut Writer<File>,
field_id: u8,

View File

@ -16,7 +16,7 @@ use rayon::prelude::*;
use rayon::ThreadPool;
use crate::index::Index;
use crate::update::{FacetLevels, UpdateIndexingStep};
use crate::update::{Facets, UpdateIndexingStep};
use self::store::{Store, Readers};
use self::merge_function::{
main_merge, word_docids_merge, words_pairs_proximities_docids_merge,
@ -584,7 +584,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
});
}
let mut builder = FacetLevels::new(self.wtxn, self.index);
let mut builder = Facets::new(self.wtxn, self.index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;

View File

@ -1,7 +1,7 @@
mod available_documents_ids;
mod clear_documents;
mod delete_documents;
mod facet_levels;
mod facets;
mod index_documents;
mod settings;
mod update_builder;
@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::DeleteDocuments;
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
pub use self::facet_levels::{FacetLevels, EasingName};
pub use self::facets::{Facets, EasingName};
pub use self::settings::Settings;
pub use self::update_builder::UpdateBuilder;
pub use self::update_step::UpdateIndexingStep;

View File

@ -2,7 +2,7 @@ use grenad::CompressionType;
use rayon::ThreadPool;
use crate::Index;
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, FacetLevels};
use super::{ClearDocuments, DeleteDocuments, IndexDocuments, Settings, Facets};
pub struct UpdateBuilder<'a> {
pub(crate) log_every_n: Option<usize>,
@ -119,13 +119,13 @@ impl<'a> UpdateBuilder<'a> {
builder
}
pub fn facet_levels<'t, 'u, 'i>(
pub fn facets<'t, 'u, 'i>(
self,
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> FacetLevels<'t, 'u, 'i>
) -> Facets<'t, 'u, 'i>
{
let mut builder = FacetLevels::new(wtxn, index);
let mut builder = Facets::new(wtxn, index);
builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level;