add exact words setting

This commit is contained in:
ad hoc 2022-03-21 14:03:31 +01:00
parent 48a5ce7434
commit 9bbffb8fee
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 49 additions and 0 deletions

View File

@ -52,6 +52,7 @@ pub mod main_key {
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
pub const EXACT_WORDS: &str = "exact-words";
}
pub mod db_name {
@ -927,6 +928,27 @@ impl Index {
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
Ok(())
}
/// List the words on which typo are not allowed
pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? {
Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?),
None => Ok(fst::Set::default().map_data(Cow::Owned)?),
}
}
pub(crate) fn put_exact_words<A: AsRef<[u8]>>(
&self,
txn: &mut RwTxn,
words: &fst::Set<A>,
) -> Result<()> {
self.main.put::<_, Str, ByteSlice>(
txn,
main_key::EXACT_WORDS,
words.as_fst().as_bytes(),
)?;
Ok(())
}
}
#[cfg(test)]

View File

@ -92,6 +92,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
authorize_typos: Setting<bool>,
min_word_len_two_typos: Setting<u8>,
min_word_len_one_typo: Setting<u8>,
exact_words: Setting<Vec<String>>,
}
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@ -113,6 +114,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
synonyms: Setting::NotSet,
primary_key: Setting::NotSet,
authorize_typos: Setting::NotSet,
exact_words: Setting::NotSet,
indexer_config,
min_word_len_two_typos: Setting::Reset,
min_word_len_one_typo: Setting::Reset,
@ -216,6 +218,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.min_word_len_one_typo = Setting::Reset;
}
pub fn set_exact_words(&mut self, words: Vec<String>) {
self.exact_words = Setting::Set(words);
}
pub fn reset_exact_words(&mut self) {
self.exact_words = Setting::Reset;
}
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
where
F: Fn(UpdateIndexingStep) + Sync,
@ -526,6 +536,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
fn update_exact_words(&mut self) -> Result<()> {
match self.exact_words {
Setting::Set(ref mut words) => {
words.sort_unstable();
let words = fst::Set::from_iter(words)?;
self.index.put_exact_words(&mut self.wtxn, &words)?;
}
Setting::Reset => {
self.index.put_exact_words(&mut self.wtxn, &fst::Set::default())?;
}
Setting::NotSet => (),
}
Ok(())
}
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
where
F: Fn(UpdateIndexingStep) + Sync,
@ -543,6 +569,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.update_primary_key()?;
self.update_authorize_typos()?;
self.update_min_typo_word_len()?;
self.update_exact_words()?;
// If there is new faceted fields we indicate that we must reindex as we must
// index new fields as facets. It means that the distinct attribute,