mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Fix the indexing of the searchable
This commit is contained in:
parent
4e4a1ddff7
commit
685f452fb2
@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let mut ctx = SearchContext::new(&index, &txn)?;
|
||||
let universe = filtered_universe(&ctx, &None)?;
|
||||
|
||||
let docs = execute_search(
|
||||
|
@ -25,4 +25,8 @@ impl FieldidsWeightsMap {
|
||||
pub fn max_weight(&self) -> Option<Weight> {
|
||||
self.map.values().copied().max()
|
||||
}
|
||||
|
||||
pub fn ids<'a>(&'a self) -> impl Iterator<Item = FieldId> + 'a {
|
||||
self.map.keys().copied()
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
|
||||
GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
|
||||
BEU16, BEU32, BEU64,
|
||||
Weight, BEU16, BEU32, BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -443,6 +443,27 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn searchable_fields_and_weights<'a>(
|
||||
&self,
|
||||
rtxn: &'a RoTxn,
|
||||
) -> heed::Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
|
||||
let fid_map = self.fields_ids_map(rtxn)?;
|
||||
let weight_map = self.fieldids_weights_map(rtxn)?;
|
||||
let searchable = self.searchable_fields(rtxn)?;
|
||||
|
||||
Ok(searchable
|
||||
.into_iter()
|
||||
.map(|field| {
|
||||
// the searchable attributes are a subset of the field id map
|
||||
let fid = fid_map.id(&field).unwrap();
|
||||
// all the searchable fields have a weight
|
||||
let weight = weight_map.weight(fid).unwrap();
|
||||
|
||||
(field, fid, weight)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/* geo rtree */
|
||||
|
||||
/// Writes the provided `rtree` which associates coordinates to documents ids.
|
||||
@ -605,9 +626,25 @@ impl Index {
|
||||
pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
user_fields: &[&str],
|
||||
user_fields: Option<&[&str]>,
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
) -> Result<()> {
|
||||
// Special case if there is no user defined fields.
|
||||
// Then the whole field id map is marked as searchable.
|
||||
if user_fields.is_none() {
|
||||
let mut weights = self.fieldids_weights_map(&wtxn)?;
|
||||
let mut searchable = Vec::new();
|
||||
for (weight, (fid, name)) in fields_ids_map.iter().enumerate() {
|
||||
searchable.push(name);
|
||||
weights.insert(fid, weight as u16);
|
||||
}
|
||||
self.put_searchable_fields(wtxn, &searchable)?;
|
||||
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let user_fields = user_fields.unwrap();
|
||||
|
||||
// We can write the user defined searchable fields as-is.
|
||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||
|
||||
@ -617,13 +654,13 @@ impl Index {
|
||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||
// 2. Iterate over the user defined searchable fields.
|
||||
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
||||
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
|
||||
let mut real_fields = user_fields.to_vec();
|
||||
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
|
||||
let mut real_fields = Vec::new();
|
||||
|
||||
for (id, field_from_map) in fields_ids_map.iter() {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !user_fields.contains(&field_from_map)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
@ -2427,6 +2464,14 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
@ -2442,6 +2487,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 1 |
|
||||
"###);
|
||||
|
||||
index.delete_documents(Default::default());
|
||||
|
||||
@ -2452,6 +2507,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 1 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
@ -2467,6 +2532,16 @@ pub(crate) mod tests {
|
||||
11 0
|
||||
4 1
|
||||
"###);
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 primary_key |
|
||||
1 a |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 1 |
|
||||
"###);
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let search = Search::new(&rtxn, &index);
|
||||
|
@ -147,7 +147,7 @@ impl<'a> Search<'a> {
|
||||
|
||||
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
|
||||
if has_vector_search {
|
||||
let ctx = SearchContext::new(self.index, self.rtxn);
|
||||
let ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
filtered_universe(&ctx, &self.filter)
|
||||
} else {
|
||||
Ok(self.execute()?.candidates)
|
||||
@ -155,7 +155,7 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<SearchResult> {
|
||||
let mut ctx = SearchContext::new(self.index, self.rtxn);
|
||||
let mut ctx = SearchContext::new(self.index, self.rtxn)?;
|
||||
|
||||
if let Some(searchable_attributes) = self.searchable_attributes {
|
||||
ctx.searchable_attributes(searchable_attributes)?;
|
||||
|
@ -159,58 +159,36 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
|
||||
/// Retrieve or insert the given value in the `word_docids` database.
|
||||
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
|
||||
fn get_db_exact_word_docids(
|
||||
&mut self,
|
||||
word: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
||||
let interned = self.word_interner.get(word).as_str();
|
||||
let keys: Vec<_> =
|
||||
self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
self.word_interner.get(word).as_str(),
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.exact_word_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
word,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_docids,
|
||||
self.index.word_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
|
||||
@ -238,58 +216,36 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
|
||||
fn get_db_exact_word_prefix_docids(
|
||||
&mut self,
|
||||
prefix: Interned<String>,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
match &self.restricted_fids {
|
||||
Some(restricted_fids) => {
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
|
||||
let interned = self.word_interner.get(prefix).as_str();
|
||||
let keys: Vec<_> =
|
||||
self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
|
||||
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
self.word_interner.get(prefix).as_str(),
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
|
||||
),
|
||||
}
|
||||
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
|
||||
self.txn,
|
||||
prefix,
|
||||
&keys[..],
|
||||
&mut self.db_cache.exact_word_prefix_docids,
|
||||
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
|
||||
merge_cbo_roaring_bitmaps,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn get_db_word_pair_proximity_docids(
|
||||
@ -465,8 +421,8 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word: Interned<String>,
|
||||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
// if the requested fid isn't in the list of searchable, return None.
|
||||
if !self.searchable_fids.contains(&fid) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
@ -484,8 +440,8 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
word_prefix: Interned<String>,
|
||||
fid: u16,
|
||||
) -> Result<Option<RoaringBitmap>> {
|
||||
// if the requested fid isn't in the restricted list, return None.
|
||||
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
|
||||
// if the requested fid isn't in the searchable list, return None.
|
||||
if !self.searchable_fids.contains(&fid) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
|
@ -258,7 +258,7 @@ pub(crate) mod tests {
|
||||
fn matching_words() {
|
||||
let temp_index = temp_index_with_documents();
|
||||
let rtxn = temp_index.read_txn().unwrap();
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn);
|
||||
let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
|
||||
let mut builder = TokenizerBuilder::default();
|
||||
let tokenizer = builder.build();
|
||||
let tokens = tokenizer.tokenize("split this world");
|
||||
|
@ -506,7 +506,7 @@ mod tests {
|
||||
|
||||
impl<'a> MatcherBuilder<'a> {
|
||||
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
|
||||
let mut ctx = SearchContext::new(index, rtxn);
|
||||
let mut ctx = SearchContext::new(index, rtxn).unwrap();
|
||||
let universe = filtered_universe(&ctx, &None).unwrap();
|
||||
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
|
||||
&mut ctx,
|
||||
|
@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use self::vector_sort::VectorSort;
|
||||
use crate::error::FieldIdMapMissingEntry;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
|
||||
UserError,
|
||||
UserError, Weight,
|
||||
};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
@ -67,12 +66,25 @@ pub struct SearchContext<'ctx> {
|
||||
pub phrase_interner: DedupInterner<Phrase>,
|
||||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub restricted_fids: Option<RestrictedFids>,
|
||||
pub searchable_fids: SearchableFids,
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
|
||||
Self {
|
||||
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
|
||||
let searchable_fids = index.searchable_fields_and_weights(txn)?;
|
||||
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
|
||||
|
||||
let mut exact = Vec::new();
|
||||
let mut tolerant = Vec::new();
|
||||
for (name, fid, weight) in searchable_fids {
|
||||
if exact_attributes_ids.contains(&fid) {
|
||||
exact.push((fid, weight));
|
||||
} else {
|
||||
tolerant.push((fid, weight));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
txn,
|
||||
db_cache: <_>::default(),
|
||||
@ -80,38 +92,32 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
phrase_interner: <_>::default(),
|
||||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
}
|
||||
searchable_fids: SearchableFids { tolerant, exact },
|
||||
})
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
|
||||
// TODO: TAMO continue here
|
||||
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
|
||||
if attributes_to_search_on.contains(&String::from("*")) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let fids_map = self.index.fields_ids_map(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields(self.txn)?;
|
||||
let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
|
||||
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
|
||||
|
||||
let mut restricted_fids = RestrictedFids::default();
|
||||
let mut contains_wildcard = false;
|
||||
for field_name in searchable_attributes {
|
||||
if field_name == "*" {
|
||||
contains_wildcard = true;
|
||||
continue;
|
||||
}
|
||||
let searchable_contains_name = searchable_names.iter().any(|name| name == field_name);
|
||||
let fid = match (fids_map.id(field_name), searchable_contains_name) {
|
||||
let mut restricted_fids = SearchableFids::default();
|
||||
for field_name in attributes_to_search_on {
|
||||
let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
|
||||
let (fid, weight) = match searchable_weight {
|
||||
// The Field id exist and the field is searchable
|
||||
(Some(fid), true) => fid,
|
||||
// The field is searchable but the Field id doesn't exist => Internal Error
|
||||
(None, true) => {
|
||||
return Err(FieldIdMapMissingEntry::FieldName {
|
||||
field_name: field_name.to_string(),
|
||||
process: "search",
|
||||
}
|
||||
.into())
|
||||
}
|
||||
Some((_name, fid, weight)) => (*fid, *weight),
|
||||
// The field is not searchable => User error
|
||||
(_fid, false) => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
self.index.remove_hidden_fields(self.txn, searchable_names)?;
|
||||
None => {
|
||||
let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
|
||||
self.txn,
|
||||
searchable_names.iter().map(|(name, _, _)| name),
|
||||
)?;
|
||||
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
@ -124,13 +130,13 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
};
|
||||
|
||||
if exact_attributes_ids.contains(&fid) {
|
||||
restricted_fids.exact.push(fid);
|
||||
restricted_fids.exact.push((fid, weight));
|
||||
} else {
|
||||
restricted_fids.tolerant.push(fid);
|
||||
restricted_fids.tolerant.push((fid, weight));
|
||||
};
|
||||
}
|
||||
|
||||
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
|
||||
self.searchable_fids = restricted_fids;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -152,14 +158,15 @@ impl Word {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct RestrictedFids {
|
||||
pub tolerant: Vec<FieldId>,
|
||||
pub exact: Vec<FieldId>,
|
||||
pub struct SearchableFids {
|
||||
pub tolerant: Vec<(FieldId, Weight)>,
|
||||
pub exact: Vec<(FieldId, Weight)>,
|
||||
}
|
||||
|
||||
impl RestrictedFids {
|
||||
impl SearchableFids {
|
||||
pub fn contains(&self, fid: &FieldId) -> bool {
|
||||
self.tolerant.contains(fid) || self.exact.contains(fid)
|
||||
self.tolerant.iter().find(|(id, _)| id == fid).is_some()
|
||||
|| self.exact.iter().find(|(id, _)| id == fid).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -366,7 +366,7 @@ mod tests {
|
||||
let tokens = tokenizer.tokenize(".");
|
||||
let index = temp_index_with_documents();
|
||||
let rtxn = index.read_txn()?;
|
||||
let mut ctx = SearchContext::new(&index, &rtxn);
|
||||
let mut ctx = SearchContext::new(&index, &rtxn)?;
|
||||
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
||||
let ExtractedTokens { query_terms, .. } =
|
||||
located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
|
||||
#[test]
|
||||
fn test_attribute_fid_ngrams() {
|
||||
let index = create_index();
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 title |
|
||||
1 description |
|
||||
2 plot |
|
||||
3 id |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 1 |
|
||||
2 2 |
|
||||
"###);
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
|
@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_fieldids_weights_map(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
|
||||
|
||||
let mut snap = String::new();
|
||||
writeln!(&mut snap, "fid weight").unwrap();
|
||||
let mut field_ids: Vec<_> = weights_map.ids().collect();
|
||||
field_ids.sort();
|
||||
for field_id in field_ids {
|
||||
let weight = weights_map.weight(field_id).unwrap();
|
||||
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snap_searchable_fields(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
|
||||
format!("{searchable_fields:?}")
|
||||
}
|
||||
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
|
||||
@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
|
||||
($index:ident, fields_ids_map) => {{
|
||||
$crate::snapshot_tests::snap_fields_ids_map(&$index)
|
||||
}};
|
||||
($index:ident, fieldids_weights_map) => {{
|
||||
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
|
||||
}};
|
||||
($index:ident, searchable_fields) => {{
|
||||
$crate::snapshot_tests::snap_searchable_fields(&$index)
|
||||
}};
|
||||
($index:ident, geo_faceted_documents_ids) => {{
|
||||
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
|
||||
}};
|
||||
|
@ -496,7 +496,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
|
||||
self.index.put_all_searchable_fields_from_fields_ids_map(
|
||||
self.wtxn,
|
||||
&names,
|
||||
Some(&names),
|
||||
&new_fields_ids_map,
|
||||
)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
|
||||
@ -1228,18 +1228,19 @@ impl InnerIndexSettings {
|
||||
|
||||
// find and insert the new field ids
|
||||
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
|
||||
let searchable_fields = self
|
||||
.user_defined_searchable_fields
|
||||
.as_ref()
|
||||
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
|
||||
|
||||
// in case new fields were introduced we're going to recreate the searchable fields.
|
||||
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
|
||||
let searchable_fields =
|
||||
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
|
||||
index.put_all_searchable_fields_from_fields_ids_map(
|
||||
wtxn,
|
||||
&searchable_fields,
|
||||
&self.fields_ids_map,
|
||||
)?;
|
||||
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
self.searchable_fields_ids = searchable_fields_ids;
|
||||
}
|
||||
index.put_all_searchable_fields_from_fields_ids_map(
|
||||
wtxn,
|
||||
searchable_fields.as_deref(),
|
||||
&self.fields_ids_map,
|
||||
)?;
|
||||
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
self.searchable_fields_ids = searchable_fields_ids;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user