mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 10:07:40 +08:00
feat(search): Accept multiple words and do a simple union
This commit is contained in:
parent
758baeb8e1
commit
1476aa3dba
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -85,7 +85,6 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "fst"
|
||||
version = "0.3.0"
|
||||
source = "git+https://github.com/Kerollmops/fst.git?branch=stream-with-state#a969462433944a22f1356a8bf2affb8e9bde6f67"
|
||||
dependencies = [
|
||||
"byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -161,9 +160,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
[[package]]
|
||||
name = "levenshtein_automata"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst#13a685e087efcf253936342c055166fa5d5c9b9c"
|
||||
dependencies = [
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)",
|
||||
"fst 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -305,9 +303,9 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)",
|
||||
"fst 0.3.0",
|
||||
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)",
|
||||
"levenshtein_automata 0.1.0",
|
||||
"serde 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -651,7 +649,6 @@ dependencies = [
|
||||
"checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b"
|
||||
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)" = "<none>"
|
||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
|
||||
@ -662,7 +659,6 @@ dependencies = [
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d"
|
||||
"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef"
|
||||
"checksum levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "<none>"
|
||||
"checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b"
|
||||
"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
|
||||
"checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2"
|
||||
|
14
Cargo.toml
14
Cargo.toml
@ -16,13 +16,15 @@ tokio-service = "0.1"
|
||||
url = "1.7"
|
||||
|
||||
[dependencies.fst]
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "stream-with-state"
|
||||
path = "../../fst"
|
||||
# git = "https://github.com/Kerollmops/fst.git"
|
||||
# branch = "stream-with-state"
|
||||
|
||||
[dependencies.levenshtein_automata]
|
||||
git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
branch = "custom-fst"
|
||||
path = "../../levenshtein-automata"
|
||||
# git = "https://github.com/Kerollmops/levenshtein-automata.git"
|
||||
# branch = "custom-fst"
|
||||
features = ["fst_automaton"]
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
# [profile.release]
|
||||
# lto = true
|
||||
|
@ -13,14 +13,14 @@ use std::path::Path;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, BufReader};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use fst::Streamer;
|
||||
use futures::future;
|
||||
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
|
||||
use tokio_minihttp::{Request, Response, Http};
|
||||
use tokio_proto::TcpServer;
|
||||
use tokio_service::Service;
|
||||
|
||||
use raptor::FstMap;
|
||||
use raptor::{FstMap, OpWithStateBuilder};
|
||||
|
||||
static mut MAP: Option<FstMap<u64>> = None;
|
||||
static mut LEV_BUILDER_0: Option<LevBuilder> = None;
|
||||
@ -52,25 +52,40 @@ impl<'a> Service for MainService<'a> {
|
||||
if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
|
||||
let query = query.to_lowercase();
|
||||
|
||||
let lev = if query.len() <= 4 {
|
||||
self.lev_builder_0.build_dfa(&query)
|
||||
} else if query.len() <= 8 {
|
||||
self.lev_builder_1.build_dfa(&query)
|
||||
} else {
|
||||
self.lev_builder_2.build_dfa(&query)
|
||||
};
|
||||
let mut automatons = Vec::new();
|
||||
|
||||
let mut stream = self.map.search(&lev).with_state().into_stream();
|
||||
for query in query.split_whitespace() {
|
||||
let lev = if query.len() <= 4 {
|
||||
self.lev_builder_0.build_dfa(&query)
|
||||
} else if query.len() <= 8 {
|
||||
self.lev_builder_1.build_dfa(&query)
|
||||
} else {
|
||||
self.lev_builder_2.build_dfa(&query)
|
||||
};
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut op = OpWithStateBuilder::new(self.map.values());
|
||||
|
||||
for automaton in automatons.iter().cloned() {
|
||||
let stream = self.map.as_map().search(automaton).with_state();
|
||||
op.push(stream);
|
||||
}
|
||||
|
||||
let mut stream = op.union();
|
||||
|
||||
let mut body = String::new();
|
||||
body.push_str("<html><body>");
|
||||
|
||||
while let Some((key, values, state)) = stream.next() {
|
||||
while let Some((key, ivalues)) = stream.next() {
|
||||
match std::str::from_utf8(key) {
|
||||
Ok(key) => {
|
||||
let values = &values[..values.len().min(10)];
|
||||
let distance = lev.distance(state);
|
||||
body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, values));
|
||||
for ivalue in ivalues {
|
||||
let i = ivalue.index;
|
||||
let state = ivalue.state;
|
||||
let distance = automatons[i].distance(state);
|
||||
body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, ivalue.values));
|
||||
}
|
||||
},
|
||||
Err(e) => eprintln!("{:?}", e),
|
||||
}
|
||||
|
170
src/fst_map.rs
170
src/fst_map.rs
@ -1,5 +1,5 @@
|
||||
use bincode;
|
||||
use fst::{self, Map, MapBuilder, Automaton};
|
||||
use fst::{self, Automaton};
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::ser::Serialize;
|
||||
use std::fs::File;
|
||||
@ -10,7 +10,7 @@ use {StreamBuilder, Stream};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FstMap<T> {
|
||||
inner: Map,
|
||||
inner: fst::Map,
|
||||
values: Values<T>,
|
||||
}
|
||||
|
||||
@ -21,7 +21,7 @@ impl<T> FstMap<T> {
|
||||
P: AsRef<Path>,
|
||||
Q: AsRef<Path>
|
||||
{
|
||||
let inner = Map::from_path(map)?;
|
||||
let inner = fst::Map::from_path(map)?;
|
||||
|
||||
// TODO handle errors !!!
|
||||
let values = File::open(values).unwrap();
|
||||
@ -35,7 +35,7 @@ impl<T> FstMap<T> {
|
||||
where
|
||||
T: DeserializeOwned
|
||||
{
|
||||
let inner = Map::from_bytes(map)?;
|
||||
let inner = fst::Map::from_bytes(map)?;
|
||||
let values = bincode::deserialize(values).unwrap();
|
||||
|
||||
Ok(Self { inner, values })
|
||||
@ -62,6 +62,19 @@ impl<T> FstMap<T> {
|
||||
values: &self.values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn op(&self) -> OpBuilder<T> {
|
||||
// OpBuilder::new(&self.values).add(self.as_inner())
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn as_map(&self) -> &fst::Map {
|
||||
&self.inner
|
||||
}
|
||||
|
||||
pub fn values(&self) -> &Values<T> {
|
||||
&self.values
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@ -137,7 +150,7 @@ impl<T> FstMapBuilder<T> {
|
||||
|
||||
pub fn build_memory(self) -> fst::Result<FstMap<T>> {
|
||||
Ok(FstMap {
|
||||
inner: Map::from_iter(self.map)?,
|
||||
inner: fst::Map::from_iter(self.map)?,
|
||||
values: Values::new(self.values),
|
||||
})
|
||||
}
|
||||
@ -148,7 +161,7 @@ impl<T> FstMapBuilder<T> {
|
||||
W: Write,
|
||||
X: Write
|
||||
{
|
||||
let mut builder = MapBuilder::new(map_wrt)?;
|
||||
let mut builder = fst::MapBuilder::new(map_wrt)?;
|
||||
builder.extend_iter(self.map)?;
|
||||
let map = builder.into_inner()?;
|
||||
let values = Values::new(self.values);
|
||||
@ -159,3 +172,148 @@ impl<T> FstMapBuilder<T> {
|
||||
Ok((map, values_wrt))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpBuilder<'m, 'v, T: 'v> {
|
||||
inner: fst::map::OpBuilder<'m>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
OpBuilder {
|
||||
inner: fst::map::OpBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> Union<'m, 'v, T> {
|
||||
Union {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Union<'m, 'v, T: 'v> {
|
||||
inner: fst::map::Union<'m>,
|
||||
outs: Vec<IndexedValues<'v, T>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> {
|
||||
type Item = (&'a [u8], &'a [IndexedValues<'a, T>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
self.outs.push(IndexedValues { index, values })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedValues<'a, T: 'a> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
}
|
||||
|
||||
pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::OpWithStateBuilder<'m, U>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> {
|
||||
pub fn new(values: &'v Values<T>) -> Self {
|
||||
Self {
|
||||
inner: fst::map::OpWithStateBuilder::new(),
|
||||
values: values,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add<I, S>(mut self, streamable: I) -> Self
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.push(streamable);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn push<I, S>(&mut self, streamable: I)
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>,
|
||||
S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>,
|
||||
{
|
||||
self.inner.push(streamable);
|
||||
}
|
||||
|
||||
pub fn union(self) -> UnionWithState<'m, 'v, T, U> {
|
||||
UnionWithState {
|
||||
inner: self.inner.union(),
|
||||
outs: Vec::new(),
|
||||
values: self.values,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnionWithState<'m, 'v, T: 'v, U> {
|
||||
inner: fst::map::UnionWithState<'m, U>,
|
||||
outs: Vec<IndexedValuesWithState<'v, T, U>>,
|
||||
values: &'v Values<T>,
|
||||
}
|
||||
|
||||
impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U>
|
||||
where
|
||||
U: Clone,
|
||||
{
|
||||
type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]);
|
||||
|
||||
fn next(&'a mut self) -> Option<Self::Item> {
|
||||
match self.inner.next() {
|
||||
Some((s, ivalues)) => {
|
||||
self.outs.clear();
|
||||
for ivalue in ivalues {
|
||||
let index = ivalue.index;
|
||||
let values = unsafe { self.values.get_unchecked(ivalue.value as usize) };
|
||||
let state = ivalue.state.clone();
|
||||
self.outs.push(IndexedValuesWithState { index, values, state })
|
||||
}
|
||||
Some((s, &self.outs))
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedValuesWithState<'a, T: 'a, U> {
|
||||
pub index: usize,
|
||||
pub values: &'a [T],
|
||||
pub state: U,
|
||||
}
|
||||
|
@ -5,13 +5,13 @@ extern crate serde;
|
||||
|
||||
mod fst_map;
|
||||
|
||||
use std::ops::Range;
|
||||
use std::io::{Write, BufReader};
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use fst::Automaton;
|
||||
|
||||
pub use self::fst_map::{FstMap, FstMapBuilder};
|
||||
pub use self::fst_map::{
|
||||
OpBuilder, IndexedValues,
|
||||
OpWithStateBuilder, IndexedValuesWithState,
|
||||
};
|
||||
use self::fst_map::Values;
|
||||
|
||||
pub struct StreamBuilder<'m, 'v, T: 'v, A> {
|
||||
|
Loading…
Reference in New Issue
Block a user