Port all tests to the TempDatabase struct

This commit is contained in:
Clément Renault 2019-10-08 16:16:30 +02:00
parent c514692233
commit 175461c13a
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 986 additions and 825 deletions

View File

@ -9,6 +9,7 @@ use levenshtein_automata::DFA;
use meilidb_tokenizer::{split_query_string, is_cjk};
use crate::store;
use crate::error::MResult;
use self::dfa::{build_dfa, build_prefix_dfa};
use self::query_enhancer::QueryEnhancerBuilder;
@ -24,11 +25,18 @@ impl AutomatonProducer {
pub fn new(
reader: &impl rkv::Readable,
query: &str,
main_store: store::Main,
synonyms_store: store::Synonyms,
) -> (AutomatonProducer, QueryEnhancer)
) -> MResult<(AutomatonProducer, QueryEnhancer)>
{
let (automatons, query_enhancer) = generate_automatons(reader, query, synonyms_store).unwrap();
(AutomatonProducer { automatons }, query_enhancer)
let (automatons, query_enhancer) = generate_automatons(
reader,
query,
main_store,
synonyms_store,
)?;
Ok((AutomatonProducer { automatons }, query_enhancer))
}
pub fn into_iter(self) -> vec::IntoIter<Vec<Automaton>> {
@ -102,12 +110,16 @@ pub fn normalize_str(string: &str) -> String {
fn generate_automatons(
reader: &impl rkv::Readable,
query: &str,
main_store: store::Main,
synonym_store: store::Synonyms,
) -> Result<(Vec<Vec<Automaton>>, QueryEnhancer), rkv::StoreError>
) -> MResult<(Vec<Vec<Automaton>>, QueryEnhancer)>
{
let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
let synonyms = synonym_store.synonyms_fst(reader)?;
let synonyms = match main_store.synonyms_fst(reader)? {
Some(synonym) => synonym,
None => fst::Set::default(),
};
let mut automaton_index = 0;
let mut automatons = Vec::new();
@ -157,7 +169,7 @@ fn generate_automatons(
let base_nb_words = split_query_string(base).count();
if ngram_nb_words != base_nb_words { continue }
if let Some(synonyms) = synonym_store.alternatives_to(reader, base.as_bytes())? {
if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
let mut stream = synonyms.into_stream();
while let Some(synonyms) = stream.next() {

File diff suppressed because it is too large Load Diff

View File

@ -99,6 +99,33 @@ impl Main {
}
}
pub fn put_synonyms_fst(
&self,
writer: &mut rkv::Writer,
fst: &fst::Set,
) -> MResult<()>
{
let blob = rkv::Value::Blob(fst.as_fst().as_bytes());
Ok(self.main.put(writer, SYNONYMS_KEY, &blob)?)
}
pub fn synonyms_fst(
&self,
reader: &impl rkv::Readable,
) -> MResult<Option<fst::Set>>
{
match self.main.get(reader, SYNONYMS_KEY)? {
Some(Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
pub fn put_number_of_documents<F: Fn(u64) -> u64>(
&self,
writer: &mut rkv::Writer,

View File

@ -1,23 +1,38 @@
use std::sync::Arc;
use crate::error::MResult;
#[derive(Copy, Clone)]
pub struct Synonyms {
pub(crate) synonyms: rkv::SingleStore,
}
impl Synonyms {
pub fn synonyms_fst(
pub fn put_synonyms(
&self,
reader: &impl rkv::Readable,
) -> Result<fst::Set, rkv::StoreError>
writer: &mut rkv::Writer,
word: &[u8],
synonyms: &fst::Set,
) -> Result<(), rkv::StoreError>
{
Ok(fst::Set::default())
let blob = rkv::Value::Blob(synonyms.as_fst().as_bytes());
self.synonyms.put(writer, word, &blob)
}
pub fn alternatives_to(
pub fn synonyms(
&self,
reader: &impl rkv::Readable,
word: &[u8],
) -> Result<Option<fst::Set>, rkv::StoreError>
) -> MResult<Option<fst::Set>>
{
unimplemented!()
match self.synonyms.get(reader, word)? {
Some(rkv::Value::Blob(bytes)) => {
let len = bytes.len();
let bytes = Arc::from(bytes);
let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len)?;
Ok(Some(fst::Set::from(fst)))
},
Some(value) => panic!("invalid type {:?}", value),
None => Ok(None),
}
}
}