First implementation of the CONTAINS filter

This commit is contained in:
Kerollmops 2023-05-16 10:32:28 +02:00
parent bdcee66f9a
commit 3eb31dbf3f
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 37 additions and 8 deletions

1
Cargo.lock generated
View File

@ -2738,6 +2738,7 @@ dependencies = [
"logging_timer",
"maplit",
"md5",
"memchr",
"memmap2",
"mimalloc",
"obkv",

View File

@ -25,10 +25,16 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
grenad = { version = "0.4.4", default-features = false, features = [
"tempfile",
] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = [
"lmdb",
"sync-read-txn",
] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0"
memmap2 = "0.5.10"
obkv = "0.2.0"
once_cell = "1.17.1"
@ -39,12 +45,17 @@ rstar = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
slice-group-by = "0.3.0"
smallstr = { version = "0.3.0", features = ["serde"] }
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.10.0"
smartstring = "1.0.1"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
uuid = { version = "1.3.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" }
@ -63,13 +74,13 @@ big_s = "1.0.2"
insta = "1.29.0"
maplit = "1.0.2"
md5 = "0.7.0"
rand = {version = "0.8.5", features = ["small_rng"] }
rand = { version = "0.8.5", features = ["small_rng"] }
[target.'cfg(fuzzing)'.dev-dependencies]
fuzzcheck = "0.12.1"
[features]
all-tokenizations = [ "charabia/default" ]
all-tokenizations = ["charabia/default"]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml

View File

@ -4,13 +4,15 @@ use std::ops::Bound::{self, Excluded, Included};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
use heed::LazyDecode;
use memchr::memmem::Finder;
use roaring::RoaringBitmap;
use serde_json::Value;
use super::facet_range_search;
use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
@ -299,7 +301,22 @@ impl<'a> Filter<'a> {
return Ok(all_ids - docids);
}
Condition::Contains(val) => {
todo!()
let finder = Finder::new(val.value());
let base = FacetGroupKey { field_id, level: 0, left_bound: "" };
// TODO use the roaring::MultiOps trait
let mut docids = RoaringBitmap::new();
for result in strings_db
.prefix_iter(rtxn, &base)?
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
{
let (FacetGroupKey { left_bound, .. }, lazy_group_value) = result?;
if finder.find(left_bound.as_bytes()).is_some() {
let FacetGroupValue { bitmap, .. } = lazy_group_value.decode()?;
docids |= bitmap;
}
}
return Ok(docids);
}
Condition::StartsWith(val) => {
todo!()