mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-29 16:45:30 +08:00
Use OnceCell to cache levenshtein builders
This commit is contained in:
parent
dde3e01a59
commit
dff68a339a
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -793,6 +793,7 @@ dependencies = [
|
|||||||
"jemallocator",
|
"jemallocator",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
"memmap",
|
"memmap",
|
||||||
|
"once_cell",
|
||||||
"oxidized-mtbl",
|
"oxidized-mtbl",
|
||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
|
@ -17,6 +17,7 @@ heed = { version = "0.8.0", default-features = false, features = ["lmdb"] }
|
|||||||
jemallocator = "0.3.2"
|
jemallocator = "0.3.2"
|
||||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
|
once_cell = "1.4.0"
|
||||||
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "8918476" }
|
oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "8918476" }
|
||||||
rayon = "1.3.0"
|
rayon = "1.3.0"
|
||||||
roaring = "0.5.2"
|
roaring = "0.5.2"
|
||||||
|
@ -190,13 +190,15 @@ fn index_csv(mut rdr: csv::Reader<File>) -> anyhow::Result<MtblKvStore> {
|
|||||||
.or_insert_with(RoaringBitmap::new)
|
.or_insert_with(RoaringBitmap::new)
|
||||||
.insert(document_id);
|
.insert(document_id);
|
||||||
if let Some(prefix) = word.as_bytes().get(0..word.len().min(4)) {
|
if let Some(prefix) = word.as_bytes().get(0..word.len().min(4)) {
|
||||||
prefix_postings_ids.entry(SmallVec32::from(prefix))
|
for i in 0..prefix.len() {
|
||||||
|
prefix_postings_ids.entry(SmallVec32::from(&prefix[..i]))
|
||||||
.or_insert_with(RoaringBitmap::new)
|
.or_insert_with(RoaringBitmap::new)
|
||||||
.insert(document_id);
|
.insert(document_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We write the document in the database.
|
// We write the document in the database.
|
||||||
let mut writer = csv::WriterBuilder::new().has_headers(false).from_writer(Vec::new());
|
let mut writer = csv::WriterBuilder::new().has_headers(false).from_writer(Vec::new());
|
||||||
|
22
src/lib.rs
22
src/lib.rs
@ -7,10 +7,15 @@ use fst::{IntoStreamer, Streamer};
|
|||||||
use fxhash::FxHasher32;
|
use fxhash::FxHasher32;
|
||||||
use heed::types::*;
|
use heed::types::*;
|
||||||
use heed::{PolyDatabase, Database};
|
use heed::{PolyDatabase, Database};
|
||||||
use levenshtein_automata::LevenshteinAutomatonBuilder;
|
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||||
|
use once_cell::sync::OnceCell;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::StrGroupBy;
|
use slice_group_by::StrGroupBy;
|
||||||
|
|
||||||
|
static LEVDIST0: OnceCell<LevBuilder> = OnceCell::new();
|
||||||
|
static LEVDIST1: OnceCell<LevBuilder> = OnceCell::new();
|
||||||
|
static LEVDIST2: OnceCell<LevBuilder> = OnceCell::new();
|
||||||
|
|
||||||
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
|
||||||
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
|
||||||
pub type SmallVec32 = smallvec::SmallVec<[u8; 32]>;
|
pub type SmallVec32 = smallvec::SmallVec<[u8; 32]>;
|
||||||
@ -37,12 +42,7 @@ impl Index {
|
|||||||
let prefix_postings_ids = env.create_database(Some("prefix-postings-ids"))?;
|
let prefix_postings_ids = env.create_database(Some("prefix-postings-ids"))?;
|
||||||
let documents = env.create_database(Some("documents"))?;
|
let documents = env.create_database(Some("documents"))?;
|
||||||
|
|
||||||
Ok(Index {
|
Ok(Index { main, postings_ids, prefix_postings_ids, documents })
|
||||||
main,
|
|
||||||
postings_ids,
|
|
||||||
prefix_postings_ids,
|
|
||||||
documents,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn headers<'t>(&self, rtxn: &'t heed::RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
pub fn headers<'t>(&self, rtxn: &'t heed::RoTxn) -> heed::Result<Option<&'t [u8]>> {
|
||||||
@ -56,9 +56,9 @@ impl Index {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
let lev0 = LevenshteinAutomatonBuilder::new(0, true);
|
let lev0 = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
|
||||||
let lev1 = LevenshteinAutomatonBuilder::new(1, true);
|
let lev1 = LEVDIST1.get_or_init(|| LevBuilder::new(1, true));
|
||||||
let lev2 = LevenshteinAutomatonBuilder::new(2, true);
|
let lev2 = LEVDIST2.get_or_init(|| LevBuilder::new(2, true));
|
||||||
|
|
||||||
let words: Vec<_> = alphanumeric_tokens(query).collect();
|
let words: Vec<_> = alphanumeric_tokens(query).collect();
|
||||||
let number_of_words = words.len();
|
let number_of_words = words.len();
|
||||||
@ -91,8 +91,8 @@ impl Index {
|
|||||||
union_result.union_with(&right);
|
union_result.union_with(&right);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
|
||||||
}
|
}
|
||||||
|
eprintln!("union for {:?} took {:.02?}", word, before.elapsed());
|
||||||
|
|
||||||
intersect_result = match intersect_result.take() {
|
intersect_result = match intersect_result.take() {
|
||||||
Some(mut left) => {
|
Some(mut left) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user