optimize 2 typos case

This commit is contained in:
mpostma 2022-01-20 23:23:07 +01:00 committed by ad hoc
parent 55e6cb9c7b
commit d0aabde502
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643

View File

@ -285,9 +285,27 @@ pub fn word_derivations<'c>(
Entry::Occupied(entry) => Ok(entry.into_mut()), Entry::Occupied(entry) => Ok(entry.into_mut()),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
let mut derived_words = Vec::new(); let mut derived_words = Vec::new();
let dfa = build_dfa(word, max_typo, is_prefix); if max_typo == 0 {
if is_prefix {
let prefix = Str::new(word).starts_with();
let mut stream = fst.search(prefix).into_stream();
while let Some(word) = stream.next() {
let word = std::str::from_utf8(word)?;
derived_words.push((word.to_string(), 0));
}
} else {
let automaton = Str::new(word);
let mut stream = fst.search(automaton).into_stream();
while let Some(word) = stream.next() {
let word = std::str::from_utf8(word)?;
derived_words.push((word.to_string(), 0));
}
}
} else {
if max_typo == 1 { if max_typo == 1 {
let starts = Str::new(get_first(word)); let dfa = build_dfa(word, 1, is_prefix);
let starts = Str::new(get_first(word)).starts_with();
let mut stream = fst.search_with_state(starts.intersection(&dfa)).into_stream(); let mut stream = fst.search_with_state(starts.intersection(&dfa)).into_stream();
while let Some((word, state)) = stream.next() { while let Some((word, state)) = stream.next() {
@ -295,21 +313,23 @@ pub fn word_derivations<'c>(
let distance = dfa.distance(state.1); let distance = dfa.distance(state.1);
derived_words.push((word.to_string(), distance.to_u8())); derived_words.push((word.to_string(), distance.to_u8()));
} }
Ok(entry.insert(derived_words))
} else { } else {
let mut stream = fst.search_with_state(&dfa).into_stream(); let starts = Str::new(get_first(word)).starts_with();
let first = build_dfa(word, 1, is_prefix).intersection((&starts).complement());
let second = build_dfa(word, 2, is_prefix).intersection(&starts);
let automaton = first.union(second);
while let Some((word, state)) = stream.next() { let mut stream = fst.search(automaton).into_stream();
while let Some(word) = stream.next() {
let word = std::str::from_utf8(word)?; let word = std::str::from_utf8(word)?;
let distance = dfa.distance(state); derived_words.push((word.to_string(), 2));
derived_words.push((word.to_string(), distance.to_u8())); }
}
} }
Ok(entry.insert(derived_words)) Ok(entry.insert(derived_words))
} }
} }
}
} }
fn get_first(s: &str) -> &str { fn get_first(s: &str) -> &str {