mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 18:45:06 +08:00
feat: Introduce the synonyms concept to the Store trait
This commit is contained in:
parent
e8b2e86007
commit
18736bdcd0
@ -349,7 +349,7 @@ mod tests {
|
|||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use fst::Set;
|
use fst::{Set, IntoStreamer};
|
||||||
|
|
||||||
use crate::DocIndex;
|
use crate::DocIndex;
|
||||||
use crate::store::Store;
|
use crate::store::Store;
|
||||||
@ -357,18 +357,46 @@ mod tests {
|
|||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct InMemorySetStore {
|
struct InMemorySetStore {
|
||||||
set: Set,
|
set: Set,
|
||||||
|
synonyms: Set,
|
||||||
indexes: HashMap<Vec<u8>, SetBuf<DocIndex>>,
|
indexes: HashMap<Vec<u8>, SetBuf<DocIndex>>,
|
||||||
|
alternatives: HashMap<Vec<u8>, Set>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Store for InMemorySetStore {
|
fn set_from_stream<'f, I, S>(stream: I) -> Set
|
||||||
type Error = std::io::Error;
|
where
|
||||||
|
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=&'a [u8]>,
|
||||||
fn words(&self) -> Result<&Set, Self::Error> {
|
S: 'f + for<'a> fst::Streamer<'a, Item=&'a [u8]>,
|
||||||
Ok(&self.set)
|
{
|
||||||
|
let mut builder = fst::SetBuilder::memory();
|
||||||
|
builder.extend_stream(stream);
|
||||||
|
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
fn insert_key(set: &Set, key: &[u8]) -> Set {
|
||||||
Ok(self.indexes.get(word).cloned())
|
let unique_key = {
|
||||||
|
let mut builder = fst::SetBuilder::memory();
|
||||||
|
builder.insert(key);
|
||||||
|
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
|
let union_ = set.op().add(unique_key.into_stream()).r#union();
|
||||||
|
|
||||||
|
set_from_stream(union_)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sdset_into_fstset(set: &sdset::Set<&str>) -> Set {
|
||||||
|
let mut builder = fst::SetBuilder::memory();
|
||||||
|
builder.extend_iter(set.into_iter());
|
||||||
|
builder.into_inner().and_then(Set::from_bytes).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InMemorySetStore {
|
||||||
|
pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) {
|
||||||
|
let alternatives = self.alternatives.entry(word.as_bytes().to_vec()).or_default();
|
||||||
|
let new = sdset_into_fstset(&new);
|
||||||
|
*alternatives = set_from_stream(alternatives.op().add(new.into_stream()).r#union());
|
||||||
|
|
||||||
|
self.synonyms = insert_key(&self.synonyms, word.as_bytes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -384,11 +412,33 @@ mod tests {
|
|||||||
|
|
||||||
InMemorySetStore {
|
InMemorySetStore {
|
||||||
set: Set::from_iter(tree).unwrap(),
|
set: Set::from_iter(tree).unwrap(),
|
||||||
|
synonyms: Set::default(),
|
||||||
indexes: map,
|
indexes: map,
|
||||||
|
alternatives: HashMap::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Store for InMemorySetStore {
|
||||||
|
type Error = std::io::Error;
|
||||||
|
|
||||||
|
fn words(&self) -> Result<&Set, Self::Error> {
|
||||||
|
Ok(&self.set)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||||
|
Ok(self.indexes.get(word).cloned())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn synonyms(&self) -> Result<&Set, Self::Error> {
|
||||||
|
Ok(&self.synonyms)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
|
||||||
|
Ok(self.alternatives.get(word).map(|s| Set::from_bytes(s.as_fst().to_vec()).unwrap()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const fn doc_index(document_id: u64, word_index: u16) -> DocIndex {
|
const fn doc_index(document_id: u64, word_index: u16) -> DocIndex {
|
||||||
DocIndex {
|
DocIndex {
|
||||||
document_id: DocumentId(document_id),
|
document_id: DocumentId(document_id),
|
||||||
|
@ -8,6 +8,9 @@ pub trait Store {
|
|||||||
|
|
||||||
fn words(&self) -> Result<&Set, Self::Error>;
|
fn words(&self) -> Result<&Set, Self::Error>;
|
||||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
|
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error>;
|
||||||
|
|
||||||
|
fn synonyms(&self) -> Result<&Set, Self::Error>;
|
||||||
|
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> Store for &'_ T where T: Store {
|
impl<T> Store for &'_ T where T: Store {
|
||||||
@ -20,4 +23,12 @@ impl<T> Store for &'_ T where T: Store {
|
|||||||
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
fn word_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, Self::Error> {
|
||||||
(*self).word_indexes(word)
|
(*self).word_indexes(word)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn synonyms(&self) -> Result<&Set, Self::Error> {
|
||||||
|
(*self).synonyms()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alternatives_to(&self, word: &[u8]) -> Result<Option<Set>, Self::Error> {
|
||||||
|
(*self).alternatives_to(word)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user