mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-19 01:18:31 +08:00
Remove the useless threshold when computing the word prefix pair proximity
This commit is contained in:
parent
e3c34684c6
commit
23ea3ad738
@ -18,7 +18,6 @@ pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
pub(crate) chunk_compression_level: Option<u32>,
|
pub(crate) chunk_compression_level: Option<u32>,
|
||||||
pub(crate) max_nb_chunks: Option<usize>,
|
pub(crate) max_nb_chunks: Option<usize>,
|
||||||
pub(crate) max_memory: Option<usize>,
|
pub(crate) max_memory: Option<usize>,
|
||||||
threshold: u32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
||||||
@ -33,21 +32,9 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
max_nb_chunks: None,
|
max_nb_chunks: None,
|
||||||
max_memory: None,
|
max_memory: None,
|
||||||
threshold: 100,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the number of words required to make a prefix be part of the words prefixes
|
|
||||||
/// database. If a word prefix is supposed to match more than this number of words in the
|
|
||||||
/// dictionnary, therefore this prefix is added to the words prefixes datastructures.
|
|
||||||
///
|
|
||||||
/// Default value is 100. This value must be higher than 50 and will be clamped
|
|
||||||
/// to these bound otherwise.
|
|
||||||
pub fn threshold(&mut self, value: u32) -> &mut Self {
|
|
||||||
self.threshold = value.max(50);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
#[logging_timer::time("WordPrefixPairProximityDocids::{}")]
|
||||||
pub fn execute(self) -> Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
@ -81,7 +68,6 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
self.threshold,
|
|
||||||
)?;
|
)?;
|
||||||
prefix_fst_keys.iter().find(|prefixes| w2.starts_with(&prefixes[0]))
|
prefix_fst_keys.iter().find(|prefixes| w2.starts_with(&prefixes[0]))
|
||||||
}
|
}
|
||||||
@ -109,7 +95,6 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
write_prefixes_in_sorter(
|
write_prefixes_in_sorter(
|
||||||
&mut prefixes_cache,
|
&mut prefixes_cache,
|
||||||
&mut word_prefix_pair_proximity_docids_sorter,
|
&mut word_prefix_pair_proximity_docids_sorter,
|
||||||
self.threshold,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
drop(prefix_fst);
|
drop(prefix_fst);
|
||||||
@ -131,15 +116,10 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
|
|||||||
fn write_prefixes_in_sorter(
|
fn write_prefixes_in_sorter(
|
||||||
prefixes: &mut HashMap<Vec<u8>, Vec<&[u8]>>,
|
prefixes: &mut HashMap<Vec<u8>, Vec<&[u8]>>,
|
||||||
sorter: &mut grenad::Sorter<MergeFn>,
|
sorter: &mut grenad::Sorter<MergeFn>,
|
||||||
min_word_per_prefix: u32,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for (key, data_slices) in prefixes.drain() {
|
for (key, data_slices) in prefixes.drain() {
|
||||||
// if the number of words prefixed by the prefix is higher than the threshold,
|
for data in data_slices {
|
||||||
// we insert it in the sorter.
|
sorter.insert(&key, data)?;
|
||||||
if data_slices.len() > min_word_per_prefix as usize {
|
|
||||||
for data in data_slices {
|
|
||||||
sorter.insert(&key, data)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user