map: Allow using the Levenshtein algorithm to search

This commit is contained in:
Kerollmops 2018-04-22 20:06:56 +02:00
parent 0581b296bb
commit 96d2fbcd3d
No known key found for this signature in database
GPG Key ID: 016ACC0DC3ADC318
4 changed files with 78 additions and 6 deletions

17
Cargo.lock generated
View File

@ -99,6 +99,15 @@ dependencies = [
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "fst-levenshtein"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "fuchsia-zircon" name = "fuchsia-zircon"
version = "0.3.3" version = "0.3.3"
@ -306,6 +315,7 @@ dependencies = [
"bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fst-levenshtein 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
@ -598,6 +608,11 @@ dependencies = [
"percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "utf8-ranges"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "winapi" name = "winapi"
version = "0.2.8" version = "0.2.8"
@ -650,6 +665,7 @@ dependencies = [
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d94485a00b1827b861dd9d1a2cc9764f9044d4c535514c0760a5a2012ef3399f" "checksum fst 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d94485a00b1827b861dd9d1a2cc9764f9044d4c535514c0760a5a2012ef3399f"
"checksum fst-levenshtein 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "64f12af1569dd78afbefe476034bbdce0372d18e9dc75b634bde0e7b8bf994c8"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c" "checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c"
@ -706,6 +722,7 @@ dependencies = [
"checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f" "checksum unicode-normalization 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "51ccda9ef9efa3f7ef5d91e8f9b83bbe6955f9bf86aec89d5cce2c874625920f"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7" "checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7"
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3" "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"

View File

@ -7,12 +7,13 @@ authors = ["Kerollmops <renault.cle@gmail.com>"]
bincode = "1.0" bincode = "1.0"
env_logger = { version = "0.3", default-features = false } env_logger = { version = "0.3", default-features = false }
fst = "0.3" fst = "0.3"
fst-levenshtein = "0.2"
futures = "0.1" futures = "0.1"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
smallvec = { version = "0.6", features = ["serde"] }
tokio-minihttp = { git = "https://github.com/tokio-rs/tokio-minihttp.git" } tokio-minihttp = { git = "https://github.com/tokio-rs/tokio-minihttp.git" }
tokio-proto = "0.1" tokio-proto = "0.1"
tokio-service = "0.1" tokio-service = "0.1"
serde = "1.0"
serde_json = "1.0"
serde_derive = "1.0"
smallvec = { version = "0.6", features = ["serde"] }
url = "1.7" url = "1.7"

View File

@ -1,4 +1,6 @@
extern crate env_logger; extern crate env_logger;
extern crate fst;
extern crate fst_levenshtein;
extern crate futures; extern crate futures;
extern crate raptor; extern crate raptor;
extern crate tokio_minihttp; extern crate tokio_minihttp;
@ -8,6 +10,8 @@ extern crate url;
use std::io; use std::io;
use fst_levenshtein::Levenshtein;
use fst::{IntoStreamer, Streamer};
use futures::future; use futures::future;
use tokio_minihttp::{Request, Response, Http}; use tokio_minihttp::{Request, Response, Http};
use tokio_proto::TcpServer; use tokio_proto::TcpServer;
@ -34,8 +38,18 @@ impl Service for MainService {
if let Some((_, key)) = url.query_pairs().find(|&(ref k, _)| k == "q") { if let Some((_, key)) = url.query_pairs().find(|&(ref k, _)| k == "q") {
let key = key.to_lowercase(); let key = key.to_lowercase();
let values = self.map.get(&key).map(|a| &a[..10]);
resp.body(&format!("{:?}", values)); let lev = Levenshtein::new(&key, 2).unwrap();
let mut body = String::new();
let mut stream = self.map.search(lev).into_stream();
while let Some((key, values)) = stream.next() {
let values = &values[..values.len().min(10)];
body.push_str(&format!("{:?} {:?}\n", key, values));
}
resp.body(&body);
} }
future::ok(resp) future::ok(resp)

View File

@ -5,6 +5,7 @@ extern crate serde_json;
#[macro_use] extern crate serde_derive; #[macro_use] extern crate serde_derive;
extern crate smallvec; extern crate smallvec;
use std::ops::{Deref, DerefMut};
use std::io::Write; use std::io::Write;
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
@ -50,6 +51,45 @@ impl MultiMap {
pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[u64]> { pub fn get<K: AsRef<[u8]>>(&self, key: K) -> Option<&[u64]> {
self.map.get(key).map(|i| &*self.values[i as usize]) self.map.get(key).map(|i| &*self.values[i as usize])
} }
pub fn search<A: fst::Automaton>(&self, aut: A) -> StreamBuilder<A> {
StreamBuilder {
inner: self.map.search(aut),
values: &self.values,
}
}
}
pub struct StreamBuilder<'a, A: fst::Automaton> {
inner: fst::map::StreamBuilder<'a, A>,
values: &'a [SmallVec32<u64>],
}
impl<'a, A: fst::Automaton> Deref for StreamBuilder<'a, A> {
type Target = fst::map::StreamBuilder<'a, A>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl<'a, A: fst::Automaton> DerefMut for StreamBuilder<'a, A> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
impl<'a, A: fst::Automaton> fst::IntoStreamer<'a> for StreamBuilder<'a, A> {
type Item = (&'a str, &'a [u64]);
type Into = Stream<'a, A>;
fn into_stream(self) -> Self::Into {
Stream {
inner: self.inner.into_stream(),
values: self.values,
}
}
} }
pub struct Stream<'a, A: fst::Automaton = fst::automaton::AlwaysMatch> { pub struct Stream<'a, A: fst::Automaton = fst::automaton::AlwaysMatch> {