mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 03:55:07 +08:00
Use OnceCell to cache levenshtein builders
This commit is contained in:
parent
dde3e01a59
commit
dff68a339a
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -793,6 +793,7 @@ dependencies = [
|
||||
"jemallocator",
|
||||
"levenshtein_automata",
|
||||
"memmap",
|
||||
"once_cell",
|
||||
"oxidized-mtbl",
|
||||
"rayon",
|
||||
"roaring",
|
||||
|
@ -17,6 +17,7 @@ heed = { version = "0.8.0", default-features = false, features = ["lmdb"] }
|
||||
jemallocator = "0.3.2"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
memmap = "0.7.0"
|
||||
once_cell = "1.4.0"
|
||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "8918476" }
|
||||
rayon = "1.3.0"
|
||||
roaring = "0.5.2"
|
||||
|
@ -190,13 +190,15 @@ fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
||||
.or_insert_with(RoaringBitmap::new)
|
||||
.insert(document_id);
|
||||
if let Some(prefix) = word.as_bytes().get(0..word.len().min(4)) {
|
||||
prefix_postings_ids.entry(SmallVec32::from(prefix))
|
||||
for i in 0..prefix.len() {
|
||||
prefix_postings_ids.entry(SmallVec32::from(&prefix[..i]))
|
||||
.or_insert_with(RoaringBitmap::new)
|
||||
.insert(document_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We write the document in the database.
|
||||
let mut writer = csv::WriterBuilder::new().has_headers(false).from_writer(Vec::new());
|
||||
|
22
src/lib.rs
22
src/lib.rs
@ -7,10 +7,15 @@ use fst::{IntoStreamer, Streamer};
|
||||
use fxhash::FxHasher32;
|
||||
use heed::types::*;
|
||||
use heed::{PolyDatabase, Database};
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use once_cell::sync::OnceCell;
|
||||
use roaring::RoaringBitmap;
|
||||
use slice_group_by::StrGroupBy;
|
||||
|
||||
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
|
||||
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
|
||||
|
||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
||||
pub type SmallVec32 = smallvec::SmallVec<[u8; 32]>;
|
||||
@ -37,12 +42,7 @@ impl Index {
|
||||
let prefix_postings_ids = env.create_database(Some("prefix-postings-ids"))?;
|
||||
let documents = env.create_database(Some("documents"))?;
|
||||
|
||||
Ok(Index {
|
||||
main,
|
||||
postings_ids,
|
||||
prefix_postings_ids,
|
||||
documents,
|
||||
})
|
||||
Ok(Index { main, postings_ids, prefix_postings_ids, documents })
|
||||
}
|
||||
|
||||
pub fn headers<'t>(&self, rtxn: &'t heed::RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
||||
@ -56,9 +56,9 @@ impl Index {
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
let lev0 = LevenshteinAutomatonBuilder::new(0, true);
|
||||
let lev1 = LevenshteinAutomatonBuilder::new(1, true);
|
||||
let lev2 = LevenshteinAutomatonBuilder::new(2, true);
|
||||
let lev0 = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||
let lev1 = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
|
||||
let lev2 = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
|
||||
|
||||
let words: Vec<_> = alphanumeric_tokens(query).collect();
|
||||
let number_of_words = words.len();
|
||||
@ -91,8 +91,8 @@ impl Index {
|
||||
union_result.union_with(&right);
|
||||
}
|
||||
}
|
||||
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
||||
}
|
||||
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
||||
|
||||
intersect_result = match intersect_result.take() {
|
||||
Some(mut left) => {
|
||||
|
Loading…
Reference in New Issue
Block a user