diff --git a/Cargo.lock b/Cargo.lock index 5f192b6d1..6de73f0f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2738,6 +2738,7 @@ dependencies = [ "logging_timer", "maplit", "md5", + "memchr", "memmap2", "mimalloc", "obkv", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index de0f4e31d..1c370f642 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -25,10 +25,16 @@ flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" -grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] } -heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] } +grenad = { version = "0.4.4", default-features = false, features = [ + "tempfile", +] } +heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = [ + "lmdb", + "sync-read-txn", +] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } +memchr = "2.5.0" memmap2 = "0.5.10" obkv = "0.2.0" once_cell = "1.17.1" @@ -39,12 +45,17 @@ rstar = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.160", features = ["derive"] } serde_json = { version = "1.0.95", features = ["preserve_order"] } slice-group-by = "0.3.0" -smallstr = { version = "0.3.0", features = ["serde"] } +smallstr = { version = "0.3.0", features = ["serde"] } smallvec = "1.10.0" smartstring = "1.0.1" tempfile = "3.5.0" thiserror = "1.0.40" -time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.20", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } uuid = { version = "1.3.1", features = ["v4"] } filter-parser = { path = "../filter-parser" } @@ -63,13 +74,13 @@ big_s = "1.0.2" insta = "1.29.0" maplit = "1.0.2" md5 = "0.7.0" -rand = {version = "0.8.5", features = ["small_rng"] } +rand = { version = "0.8.5", features = ["small_rng"] } [target.'cfg(fuzzing)'.dev-dependencies] fuzzcheck = "0.12.1" [features] -all-tokenizations = [ "charabia/default" ] +all-tokenizations = ["charabia/default"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 96229dd7a..922d8b1a1 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -4,13 +4,15 @@ use std::ops::Bound::{self, Excluded, Included}; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token}; +use heed::LazyDecode; +use memchr::memmem::Finder; use roaring::RoaringBitmap; use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; @@ -299,7 +301,22 @@ impl<'a> Filter<'a> { return Ok(all_ids - docids); } Condition::Contains(val) => { - todo!() + let finder = Finder::new(val.value()); + let base = FacetGroupKey { field_id, level: 0, left_bound: "" }; + // TODO use the roaring::MultiOps trait + let mut docids = RoaringBitmap::new(); + for result in strings_db + .prefix_iter(rtxn, &base)? + .remap_data_type::>() + { + let (FacetGroupKey { left_bound, .. }, lazy_group_value) = result?; + if finder.find(left_bound.as_bytes()).is_some() { + let FacetGroupValue { bitmap, .. } = lazy_group_value.decode()?; + docids |= bitmap; + } + } + + return Ok(docids); } Condition::StartsWith(val) => { todo!()