diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index 6d5044d20..0fe9a5a53 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -85,6 +85,9 @@ impl SearchQueue { }, search_request = receive_new_searches.recv() => { + if search_request.is_none() { + continue; + } // this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web let search_request = search_request.unwrap(); if searches_running < usize::from(parallelism) && queue.is_empty() { diff --git a/milli/src/index.rs b/milli/src/index.rs index accfff719..49f78f3cd 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2722,8 +2722,8 @@ pub(crate) mod tests { // We should find tamo first insta::assert_debug_snapshot!(results.documents_ids, @r###" [ - 0, 1, + 0, ] "###); } diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 90d971fa3..9a2ff5b02 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -76,7 +76,7 @@ impl<'ctx> SearchContext<'ctx> { let mut exact = Vec::new(); let mut tolerant = Vec::new(); - for (name, fid, weight) in searchable_fids { + for (_name, fid, weight) in searchable_fids { if exact_attributes_ids.contains(&fid) { exact.push((fid, weight)); } else { @@ -96,22 +96,26 @@ impl<'ctx> SearchContext<'ctx> { }) } - // TODO: TAMO continue here pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> { - if attributes_to_search_on.contains(&String::from("*")) { - return Ok(()); - } - - let fids_map = self.index.fields_ids_map(self.txn)?; + let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?; let searchable_names = self.index.searchable_fields_and_weights(self.txn)?; let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; + let mut wildcard = false; + let mut restricted_fids = SearchableFids::default(); for field_name in attributes_to_search_on { + if field_name == "*" { + wildcard = true; + // we cannot early exit as we want to returns error in case of unknown fields + continue; + } let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name); let (fid, weight) = match searchable_weight { // The Field id exist and the field is searchable Some((_name, fid, weight)) => (*fid, *weight), + // The field is not searchable but the user didn't define any searchable attributes + None if user_defined_searchable.is_none() => continue, // The field is not searchable => User error None => { let (valid_fields, hidden_fields) = self.index.remove_hidden_fields( @@ -136,7 +140,16 @@ impl<'ctx> SearchContext<'ctx> { }; } - self.searchable_fids = restricted_fids; + if wildcard { + let (exact, tolerant) = searchable_names + .iter() + .map(|(_name, fid, weight)| (*fid, *weight)) + .partition(|(fid, _weight)| exact_attributes_ids.contains(fid)); + + self.searchable_fids = SearchableFids { tolerant, exact }; + } else { + self.searchable_fids = restricted_fids; + } Ok(()) } diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs index cf65249de..e10f2fbab 100644 --- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id; use crate::search::new::SearchContext; -use crate::Result; +use crate::{FieldId, Result}; #[derive(Clone, PartialEq, Eq, Hash)] pub struct FidCondition { term: LocatedQueryTermSubset, - fid: u16, + fid: Option, } pub enum FidGraph {} @@ -26,13 +26,16 @@ impl RankingRuleGraphTrait for FidGraph { universe: &RoaringBitmap, ) -> Result { let FidCondition { term, .. } = condition; - // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument - let mut docids = compute_query_term_subset_docids_within_field_id( - ctx, - &term.term_subset, - condition.fid, - )?; - docids &= universe; + + let docids = if let Some(fid) = condition.fid { + // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument + let mut docids = + compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?; + docids &= universe; + docids + } else { + RoaringBitmap::new() + }; Ok(ComputedCondition { docids, @@ -68,24 +71,27 @@ impl RankingRuleGraphTrait for FidGraph { all_fields.extend(fields); } + let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?; + let mut edges = vec![]; for fid in all_fields.iter().copied() { + let weight = weights_map.weight(fid).unwrap(); edges.push(( - fid as u32 * term.term_ids.len() as u32, - conditions_interner.insert(FidCondition { term: term.clone(), fid }), + weight as u32 * term.term_ids.len() as u32, + conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }), )); } // always lookup the max_fid if we don't already and add an artificial condition for max scoring - let max_fid: Option = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max(); + let max_weight: Option = weights_map.max_weight(); - if let Some(max_fid) = max_fid { - if !all_fields.contains(&max_fid) { + if let Some(max_weight) = max_weight { + if !all_fields.contains(&max_weight) { edges.push(( - max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. + max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. conditions_interner.insert(FidCondition { term: term.clone(), // TODO remove this ugly clone - fid: max_fid, + fid: None, }), )); } diff --git a/milli/src/search/new/tests/attribute_fid.rs b/milli/src/search/new/tests/attribute_fid.rs index 61b0a743b..c595887ba 100644 --- a/milli/src/search/new/tests/attribute_fid.rs +++ b/milli/src/search/new/tests/attribute_fid.rs @@ -132,17 +132,17 @@ fn test_attribute_fid_simple() { fn test_attribute_fid_ngrams() { let index = create_index(); db_snap!(index, fields_ids_map, @r###" - 0 title | - 1 description | - 2 plot | - 3 id | + 0 id | + 1 title | + 2 description | + 3 plot | "###); db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###); db_snap!(index, fieldids_weights_map, @r###" fid weight - 0 0 | - 1 1 | - 2 2 | + 1 0 | + 2 1 | + 3 2 | "###); let txn = index.read_txn().unwrap(); diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap new file mode 100644 index 000000000..930a21626 --- /dev/null +++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap @@ -0,0 +1,244 @@ +--- +source: milli/src/search/new/tests/attribute_fid.rs +expression: "format!(\"{document_ids_scores:#?}\")" +--- +[ + ( + 2, + [ + Fid( + Rank { + rank: 19, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), + ( + 6, + [ + Fid( + Rank { + rank: 15, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 5, + [ + Fid( + Rank { + rank: 14, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 4, + [ + Fid( + Rank { + rank: 13, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 3, + [ + Fid( + Rank { + rank: 12, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 83, + max_rank: 91, + }, + ), + ], + ), + ( + 9, + [ + Fid( + Rank { + rank: 11, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 8, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 79, + max_rank: 91, + }, + ), + ], + ), + ( + 7, + [ + Fid( + Rank { + rank: 10, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 73, + max_rank: 91, + }, + ), + ], + ), + ( + 11, + [ + Fid( + Rank { + rank: 7, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 77, + max_rank: 91, + }, + ), + ], + ), + ( + 10, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 13, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 81, + max_rank: 91, + }, + ), + ], + ), + ( + 12, + [ + Fid( + Rank { + rank: 6, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 78, + max_rank: 91, + }, + ), + ], + ), + ( + 14, + [ + Fid( + Rank { + rank: 5, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 75, + max_rank: 91, + }, + ), + ], + ), + ( + 0, + [ + Fid( + Rank { + rank: 1, + max_rank: 19, + }, + ), + Position( + Rank { + rank: 91, + max_rank: 91, + }, + ), + ], + ), +] diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 6875e6f47..2e8ac157c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -12,7 +12,6 @@ use time::OffsetDateTime; use super::index_documents::{IndexDocumentsConfig, Transform}; use super::IndexerConfig; use crate::criterion::Criterion; -use crate::documents::FieldIdMapper; use crate::error::UserError; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::order_by_map::OrderByMap; @@ -1562,8 +1561,9 @@ mod tests { // we must find the appropriate document. let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); let documents = index.documents(&rtxn, result.documents_ids).unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); assert_eq!(documents.len(), 1); - assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..])); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); drop(rtxn); // We change the searchable fields to be the "name" field only. @@ -1575,12 +1575,16 @@ mod tests { // Check that the searchable field have been reset and documents are found now. let rtxn = index.read_txn().unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); + let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap(); + snapshot!(format!("{user_defined_searchable_fields:?}"), @"None"); + // the searchable fields should contain all the fields let searchable_fields = index.searchable_fields(&rtxn).unwrap(); - snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###); + snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); let result = index.search(&rtxn).query("23").execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); let documents = index.documents(&rtxn, result.documents_ids).unwrap(); - assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..])); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); } #[test]