4254: Bring back v1.5.1 changes into main r=ManyTheFish a=Kerollmops

This pull request brings back changes from the _release-v1.5.1_ branch into _main_.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-12-14 09:41:57 +00:00 committed by GitHub
commit e0f712b9d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 121 additions and 36 deletions

30
Cargo.lock generated
View File

@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
@ -1269,7 +1269,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -1486,7 +1486,7 @@ dependencies = [
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"faux", "faux",
"tempfile", "tempfile",
@ -1508,7 +1508,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@ -1539,7 +1539,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -1657,7 +1657,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"clap", "clap",
@ -2457,7 +2457,7 @@ dependencies = [
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -2654,7 +2654,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -3083,7 +3083,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@ -3092,7 +3092,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@ -3182,7 +3182,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"base64 0.21.2", "base64 0.21.2",
"enum-iterator", "enum-iterator",
@ -3201,7 +3201,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@ -3231,7 +3231,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@ -3269,7 +3269,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"big_s", "big_s",
"bimap", "bimap",
@ -3592,7 +3592,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.5.0" version = "1.5.1"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",

View File

@ -19,7 +19,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.5.0" version = "1.5.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"] authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server" description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com" homepage = "https://meilisearch.com"

View File

@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() {
}) })
.await; .await;
} }
#[actix_rt::test]
async fn search_on_exact_field() {
let server = Server::new().await;
let index = index_with_documents(
&server,
&json!([
{
"title": "Captain Marvel",
"exact": "Captain Marivel",
"id": "1",
},
{
"title": "Captain Marivel",
"exact": "Captain the Marvel",
"id": "2",
}]),
)
.await;
let (response, code) =
index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(1).await;
// Searching on an exact attribute should only return the document matching without typo.
index
.search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["hits"].as_array().unwrap().len(), @"1");
})
.await;
}

View File

@ -162,7 +162,8 @@ impl<'ctx> SearchContext<'ctx> {
match &self.restricted_fids { match &self.restricted_fids {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str(); let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@ -187,13 +188,29 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
word: Interned<String>, word: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(word).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
&keys[..],
&mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
word, word,
self.word_interner.get(word).as_str(), self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids, &mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<Bytes>(), self.index.exact_word_docids.remap_data_type::<Bytes>(),
) ),
}
} }
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> { pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@ -224,7 +241,8 @@ impl<'ctx> SearchContext<'ctx> {
match &self.restricted_fids { match &self.restricted_fids {
Some(restricted_fids) => { Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str(); let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> = restricted_fids.iter().map(|fid| (interned, *fid)).collect(); let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
@ -249,13 +267,29 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
prefix: Interned<String>, prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>( match &self.restricted_fids {
Some(restricted_fids) => {
let interned = self.word_interner.get(prefix).as_str();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
&keys[..],
&mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps,
)
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
prefix, prefix,
self.word_interner.get(prefix).as_str(), self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids, &mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(), self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
) ),
}
} }
pub fn get_db_word_pair_proximity_docids( pub fn get_db_word_pair_proximity_docids(

View File

@ -50,7 +50,9 @@ use crate::distance::NDotProductPoint;
use crate::error::FieldIdMapMissingEntry; use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule; use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
};
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> { pub struct SearchContext<'ctx> {
@ -61,7 +63,7 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>, pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>, pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache, pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<Vec<u16>>, pub restricted_fids: Option<RestrictedFids>,
} }
impl<'ctx> SearchContext<'ctx> { impl<'ctx> SearchContext<'ctx> {
@ -81,8 +83,9 @@ impl<'ctx> SearchContext<'ctx> {
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
let fids_map = self.index.fields_ids_map(self.txn)?; let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?; let searchable_names = self.index.searchable_fields(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut restricted_fids = Vec::new(); let mut restricted_fids = RestrictedFids::default();
let mut contains_wildcard = false; let mut contains_wildcard = false;
for field_name in searchable_attributes { for field_name in searchable_attributes {
if field_name == "*" { if field_name == "*" {
@ -121,7 +124,11 @@ impl<'ctx> SearchContext<'ctx> {
} }
}; };
restricted_fids.push(fid); if exact_attributes_ids.contains(&fid) {
restricted_fids.exact.push(fid);
} else {
restricted_fids.tolerant.push(fid);
};
} }
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
@ -145,6 +152,18 @@ impl Word {
} }
} }
#[derive(Debug, Clone, Default)]
pub struct RestrictedFids {
pub tolerant: Vec<FieldId>,
pub exact: Vec<FieldId>,
}
impl RestrictedFids {
pub fn contains(&self, fid: &FieldId) -> bool {
self.tolerant.contains(fid) || self.exact.contains(fid)
}
}
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it. /// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
fn resolve_maximally_reduced_query_graph( fn resolve_maximally_reduced_query_graph(
ctx: &mut SearchContext, ctx: &mut SearchContext,