Fix the indexing of the searchable

This commit is contained in:
Tamo 2024-05-07 17:56:40 +02:00
parent 4e4a1ddff7
commit 685f452fb2
12 changed files with 235 additions and 154 deletions

View File

@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
let start = Instant::now(); let start = Instant::now();
let mut ctx = SearchContext::new(&index, &txn); let mut ctx = SearchContext::new(&index, &txn)?;
let universe = filtered_universe(&ctx, &None)?; let universe = filtered_universe(&ctx, &None)?;
let docs = execute_search( let docs = execute_search(

View File

@ -25,4 +25,8 @@ impl FieldidsWeightsMap {
pub fn max_weight(&self) -> Option<Weight> { pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max() self.map.values().copied().max()
} }
pub fn ids<'a>(&'a self) -> impl Iterator<Item = FieldId> + 'a {
self.map.keys().copied()
}
} }

View File

@ -28,7 +28,7 @@ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap, FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
BEU16, BEU32, BEU64, Weight, BEU16, BEU32, BEU64,
}; };
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -443,6 +443,27 @@ impl Index {
.unwrap_or_default()) .unwrap_or_default())
} }
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn,
) -> heed::Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
let fid_map = self.fields_ids_map(rtxn)?;
let weight_map = self.fieldids_weights_map(rtxn)?;
let searchable = self.searchable_fields(rtxn)?;
Ok(searchable
.into_iter()
.map(|field| {
// the searchable attributes are a subset of the field id map
let fid = fid_map.id(&field).unwrap();
// all the searchable fields have a weight
let weight = weight_map.weight(fid).unwrap();
(field, fid, weight)
})
.collect())
}
/* geo rtree */ /* geo rtree */
/// Writes the provided `rtree` which associates coordinates to documents ids. /// Writes the provided `rtree` which associates coordinates to documents ids.
@ -605,9 +626,25 @@ impl Index {
pub(crate) fn put_all_searchable_fields_from_fields_ids_map( pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
user_fields: &[&str], user_fields: Option<&[&str]>,
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
) -> Result<()> { ) -> Result<()> {
// Special case if there is no user defined fields.
// Then the whole field id map is marked as searchable.
if user_fields.is_none() {
let mut weights = self.fieldids_weights_map(&wtxn)?;
let mut searchable = Vec::new();
for (weight, (fid, name)) in fields_ids_map.iter().enumerate() {
searchable.push(name);
weights.insert(fid, weight as u16);
}
self.put_searchable_fields(wtxn, &searchable)?;
self.put_fieldids_weights_map(wtxn, &weights)?;
return Ok(());
}
let user_fields = user_fields.unwrap();
// We can write the user defined searchable fields as-is. // We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?; self.put_user_defined_searchable_fields(wtxn, user_fields)?;
@ -617,13 +654,13 @@ impl Index {
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion. // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields. // 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map // 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) then we push it at the end of the fields. // (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
let mut real_fields = user_fields.to_vec(); let mut real_fields = Vec::new();
for (id, field_from_map) in fields_ids_map.iter() { for (id, field_from_map) in fields_ids_map.iter() {
for (weight, user_field) in user_fields.iter().enumerate() { for (weight, user_field) in user_fields.iter().enumerate() {
if crate::is_faceted_by(field_from_map, user_field) if crate::is_faceted_by(field_from_map, user_field)
&& !user_fields.contains(&field_from_map) && !real_fields.contains(&field_from_map)
{ {
real_fields.push(field_from_map); real_fields.push(field_from_map);
@ -2427,6 +2464,14 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
"###);
index index
.add_documents(documents!([ .add_documents(documents!([
@ -2442,6 +2487,16 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 1 |
"###);
index.delete_documents(Default::default()); index.delete_documents(Default::default());
@ -2452,6 +2507,16 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 1 |
"###);
index index
.add_documents(documents!([ .add_documents(documents!([
@ -2467,6 +2532,16 @@ pub(crate) mod tests {
11 0 11 0
4 1 4 1
"###); "###);
db_snap!(index, fields_ids_map, @r###"
0 primary_key |
1 a |
"###);
db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 1 |
"###);
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index); let search = Search::new(&rtxn, &index);

View File

@ -147,7 +147,7 @@ impl<'a> Search<'a> {
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search { if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn); let ctx = SearchContext::new(self.index, self.rtxn)?;
filtered_universe(&ctx, &self.filter) filtered_universe(&ctx, &self.filter)
} else { } else {
Ok(self.execute()?.candidates) Ok(self.execute()?.candidates)
@ -155,7 +155,7 @@ impl<'a> Search<'a> {
} }
pub fn execute(&self) -> Result<SearchResult> { pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn); let mut ctx = SearchContext::new(self.index, self.rtxn)?;
if let Some(searchable_attributes) = self.searchable_attributes { if let Some(searchable_attributes) = self.searchable_attributes {
ctx.searchable_attributes(searchable_attributes)?; ctx.searchable_attributes(searchable_attributes)?;

View File

@ -159,58 +159,36 @@ impl<'ctx> SearchContext<'ctx> {
/// Retrieve or insert the given value in the `word_docids` database. /// Retrieve or insert the given value in the `word_docids` database.
fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> { fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { let interned = self.word_interner.get(word).as_str();
Some(restricted_fids) => { let keys: Vec<_> =
let interned = self.word_interner.get(word).as_str(); self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
word, word,
&keys[..], &keys[..],
&mut self.db_cache.word_docids, &mut self.db_cache.word_docids,
self.index.word_fid_docids.remap_data_type::<Bytes>(), self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.word_docids,
self.index.word_docids.remap_data_type::<Bytes>(),
),
}
} }
fn get_db_exact_word_docids( fn get_db_exact_word_docids(
&mut self, &mut self,
word: Interned<String>, word: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { let interned = self.word_interner.get(word).as_str();
Some(restricted_fids) => { let keys: Vec<_> =
let interned = self.word_interner.get(word).as_str(); self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
word, word,
&keys[..], &keys[..],
&mut self.db_cache.exact_word_docids, &mut self.db_cache.exact_word_docids,
self.index.word_fid_docids.remap_data_type::<Bytes>(), self.index.word_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
word,
self.word_interner.get(word).as_str(),
&mut self.db_cache.exact_word_docids,
self.index.exact_word_docids.remap_data_type::<Bytes>(),
),
}
} }
pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> { pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@ -238,58 +216,36 @@ impl<'ctx> SearchContext<'ctx> {
&mut self, &mut self,
prefix: Interned<String>, prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { let interned = self.word_interner.get(prefix).as_str();
Some(restricted_fids) => { let keys: Vec<_> =
let interned = self.word_interner.get(prefix).as_str(); self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
prefix, prefix,
&keys[..], &keys[..],
&mut self.db_cache.word_prefix_docids, &mut self.db_cache.word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(), self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.word_prefix_docids,
self.index.word_prefix_docids.remap_data_type::<Bytes>(),
),
}
} }
fn get_db_exact_word_prefix_docids( fn get_db_exact_word_prefix_docids(
&mut self, &mut self,
prefix: Interned<String>, prefix: Interned<String>,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
match &self.restricted_fids { let interned = self.word_interner.get(prefix).as_str();
Some(restricted_fids) => { let keys: Vec<_> =
let interned = self.word_interner.get(prefix).as_str(); self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
let keys: Vec<_> =
restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>( DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
self.txn, self.txn,
prefix, prefix,
&keys[..], &keys[..],
&mut self.db_cache.exact_word_prefix_docids, &mut self.db_cache.exact_word_prefix_docids,
self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(), self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
merge_cbo_roaring_bitmaps, merge_cbo_roaring_bitmaps,
) )
}
None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
self.txn,
prefix,
self.word_interner.get(prefix).as_str(),
&mut self.db_cache.exact_word_prefix_docids,
self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
),
}
} }
pub fn get_db_word_pair_proximity_docids( pub fn get_db_word_pair_proximity_docids(
@ -465,8 +421,8 @@ impl<'ctx> SearchContext<'ctx> {
word: Interned<String>, word: Interned<String>,
fid: u16, fid: u16,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None. // if the requested fid isn't in the list of searchable, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { if !self.searchable_fids.contains(&fid) {
return Ok(None); return Ok(None);
} }
@ -484,8 +440,8 @@ impl<'ctx> SearchContext<'ctx> {
word_prefix: Interned<String>, word_prefix: Interned<String>,
fid: u16, fid: u16,
) -> Result<Option<RoaringBitmap>> { ) -> Result<Option<RoaringBitmap>> {
// if the requested fid isn't in the restricted list, return None. // if the requested fid isn't in the searchable list, return None.
if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) { if !self.searchable_fids.contains(&fid) {
return Ok(None); return Ok(None);
} }

View File

@ -258,7 +258,7 @@ pub(crate) mod tests {
fn matching_words() { fn matching_words() {
let temp_index = temp_index_with_documents(); let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap(); let rtxn = temp_index.read_txn().unwrap();
let mut ctx = SearchContext::new(&temp_index, &rtxn); let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
let mut builder = TokenizerBuilder::default(); let mut builder = TokenizerBuilder::default();
let tokenizer = builder.build(); let tokenizer = builder.build();
let tokens = tokenizer.tokenize("split this world"); let tokens = tokenizer.tokenize("split this world");

View File

@ -506,7 +506,7 @@ mod tests {
impl<'a> MatcherBuilder<'a> { impl<'a> MatcherBuilder<'a> {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let mut ctx = SearchContext::new(index, rtxn); let mut ctx = SearchContext::new(index, rtxn).unwrap();
let universe = filtered_universe(&ctx, &None).unwrap(); let universe = filtered_universe(&ctx, &None).unwrap();
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search( let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx, &mut ctx,

View File

@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words; use self::graph_based_ranking_rule::Words;
use self::interner::Interned; use self::interner::Interned;
use self::vector_sort::VectorSort; use self::vector_sort::VectorSort;
use crate::error::FieldIdMapMissingEntry;
use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule; use crate::search::new::distinct::apply_distinct_rule;
use crate::vector::Embedder; use crate::vector::Embedder;
use crate::{ use crate::{
AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget, AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
UserError, UserError, Weight,
}; };
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
@ -67,12 +66,25 @@ pub struct SearchContext<'ctx> {
pub phrase_interner: DedupInterner<Phrase>, pub phrase_interner: DedupInterner<Phrase>,
pub term_interner: Interner<QueryTerm>, pub term_interner: Interner<QueryTerm>,
pub phrase_docids: PhraseDocIdsCache, pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<RestrictedFids>, pub searchable_fids: SearchableFids,
} }
impl<'ctx> SearchContext<'ctx> { impl<'ctx> SearchContext<'ctx> {
pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self { pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
Self { let searchable_fids = index.searchable_fields_and_weights(txn)?;
let exact_attributes_ids = index.exact_attributes_ids(txn)?;
let mut exact = Vec::new();
let mut tolerant = Vec::new();
for (name, fid, weight) in searchable_fids {
if exact_attributes_ids.contains(&fid) {
exact.push((fid, weight));
} else {
tolerant.push((fid, weight));
}
}
Ok(Self {
index, index,
txn, txn,
db_cache: <_>::default(), db_cache: <_>::default(),
@ -80,38 +92,32 @@ impl<'ctx> SearchContext<'ctx> {
phrase_interner: <_>::default(), phrase_interner: <_>::default(),
term_interner: <_>::default(), term_interner: <_>::default(),
phrase_docids: <_>::default(), phrase_docids: <_>::default(),
restricted_fids: None, searchable_fids: SearchableFids { tolerant, exact },
} })
} }
pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> { // TODO: TAMO continue here
pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
if attributes_to_search_on.contains(&String::from("*")) {
return Ok(());
}
let fids_map = self.index.fields_ids_map(self.txn)?; let fids_map = self.index.fields_ids_map(self.txn)?;
let searchable_names = self.index.searchable_fields(self.txn)?; let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?; let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
let mut restricted_fids = RestrictedFids::default(); let mut restricted_fids = SearchableFids::default();
let mut contains_wildcard = false; for field_name in attributes_to_search_on {
for field_name in searchable_attributes { let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
if field_name == "*" { let (fid, weight) = match searchable_weight {
contains_wildcard = true;
continue;
}
let searchable_contains_name = searchable_names.iter().any(|name| name == field_name);
let fid = match (fids_map.id(field_name), searchable_contains_name) {
// The Field id exist and the field is searchable // The Field id exist and the field is searchable
(Some(fid), true) => fid, Some((_name, fid, weight)) => (*fid, *weight),
// The field is searchable but the Field id doesn't exist => Internal Error
(None, true) => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: field_name.to_string(),
process: "search",
}
.into())
}
// The field is not searchable => User error // The field is not searchable => User error
(_fid, false) => { None => {
let (valid_fields, hidden_fields) = let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
self.index.remove_hidden_fields(self.txn, searchable_names)?; self.txn,
searchable_names.iter().map(|(name, _, _)| name),
)?;
let field = field_name.to_string(); let field = field_name.to_string();
return Err(UserError::InvalidSearchableAttribute { return Err(UserError::InvalidSearchableAttribute {
@ -124,13 +130,13 @@ impl<'ctx> SearchContext<'ctx> {
}; };
if exact_attributes_ids.contains(&fid) { if exact_attributes_ids.contains(&fid) {
restricted_fids.exact.push(fid); restricted_fids.exact.push((fid, weight));
} else { } else {
restricted_fids.tolerant.push(fid); restricted_fids.tolerant.push((fid, weight));
}; };
} }
self.restricted_fids = (!contains_wildcard).then_some(restricted_fids); self.searchable_fids = restricted_fids;
Ok(()) Ok(())
} }
@ -152,14 +158,15 @@ impl Word {
} }
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct RestrictedFids { pub struct SearchableFids {
pub tolerant: Vec<FieldId>, pub tolerant: Vec<(FieldId, Weight)>,
pub exact: Vec<FieldId>, pub exact: Vec<(FieldId, Weight)>,
} }
impl RestrictedFids { impl SearchableFids {
pub fn contains(&self, fid: &FieldId) -> bool { pub fn contains(&self, fid: &FieldId) -> bool {
self.tolerant.contains(fid) || self.exact.contains(fid) self.tolerant.iter().find(|(id, _)| id == fid).is_some()
|| self.exact.iter().find(|(id, _)| id == fid).is_some()
} }
} }

View File

@ -366,7 +366,7 @@ mod tests {
let tokens = tokenizer.tokenize("."); let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents(); let index = temp_index_with_documents();
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn); let mut ctx = SearchContext::new(&index, &rtxn)?;
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785> // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let ExtractedTokens { query_terms, .. } = let ExtractedTokens { query_terms, .. } =
located_query_terms_from_tokens(&mut ctx, tokens, None)?; located_query_terms_from_tokens(&mut ctx, tokens, None)?;

View File

@ -1,5 +1,5 @@
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy}; use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex { fn create_index() -> TempIndex {
let index = TempIndex::new(); let index = TempIndex::new();
@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
#[test] #[test]
fn test_attribute_fid_ngrams() { fn test_attribute_fid_ngrams() {
let index = create_index(); let index = create_index();
db_snap!(index, fields_ids_map, @r###"
0 title |
1 description |
2 plot |
3 id |
"###);
db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 1 |
2 2 |
"###);
let txn = index.read_txn().unwrap(); let txn = index.read_txn().unwrap();

View File

@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
} }
snap snap
} }
pub fn snap_fieldids_weights_map(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
let mut snap = String::new();
writeln!(&mut snap, "fid weight").unwrap();
let mut field_ids: Vec<_> = weights_map.ids().collect();
field_ids.sort();
for field_id in field_ids {
let weight = weights_map.weight(field_id).unwrap();
writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
}
snap
}
pub fn snap_searchable_fields(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let searchable_fields = index.searchable_fields(&rtxn).unwrap();
format!("{searchable_fields:?}")
}
pub fn snap_geo_faceted_documents_ids(index: &Index) -> String { pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap(); let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
($index:ident, fields_ids_map) => {{ ($index:ident, fields_ids_map) => {{
$crate::snapshot_tests::snap_fields_ids_map(&$index) $crate::snapshot_tests::snap_fields_ids_map(&$index)
}}; }};
($index:ident, fieldids_weights_map) => {{
$crate::snapshot_tests::snap_fieldids_weights_map(&$index)
}};
($index:ident, searchable_fields) => {{
$crate::snapshot_tests::snap_searchable_fields(&$index)
}};
($index:ident, geo_faceted_documents_ids) => {{ ($index:ident, geo_faceted_documents_ids) => {{
$crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index) $crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
}}; }};

View File

@ -496,7 +496,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.index.put_all_searchable_fields_from_fields_ids_map( self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn, self.wtxn,
&names, Some(&names),
&new_fields_ids_map, &new_fields_ids_map,
)?; )?;
self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
@ -1228,18 +1228,19 @@ impl InnerIndexSettings {
// find and insert the new field ids // find and insert the new field ids
pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> { pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
// in case new fields were introduced we're going to recreate the searchable fields. // in case new fields were introduced we're going to recreate the searchable fields.
if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() { index.put_all_searchable_fields_from_fields_ids_map(
let searchable_fields = wtxn,
searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>(); searchable_fields.as_deref(),
index.put_all_searchable_fields_from_fields_ids_map( &self.fields_ids_map,
wtxn, )?;
&searchable_fields, let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
&self.fields_ids_map, self.searchable_fields_ids = searchable_fields_ids;
)?;
let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
self.searchable_fields_ids = searchable_fields_ids;
}
Ok(()) Ok(())
} }