From 22c26a5dd0709b308f2c688a1e1d4c1d4e0a946e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Sun, 13 May 2018 15:12:15 +0200 Subject: [PATCH] feat: Make the parsing more generic over json --- raptor-bin/Cargo.lock | 555 +--------------------------------------- raptor-bin/src/main.rs | 69 ++--- raptor-http/src/main.rs | 4 +- src/lib.rs | 12 + 4 files changed, 57 insertions(+), 583 deletions(-) diff --git a/raptor-bin/Cargo.lock b/raptor-bin/Cargo.lock index 442b5d500..d9ed788a9 100644 --- a/raptor-bin/Cargo.lock +++ b/raptor-bin/Cargo.lock @@ -1,11 +1,3 @@ -[[package]] -name = "arrayvec" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "bincode" version = "1.0.0" @@ -15,153 +7,36 @@ dependencies = [ "serde 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "bitflags" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "byteorder" version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "bytes" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "cfg-if" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "crossbeam-deque" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "crossbeam-epoch 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "crossbeam-utils" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "dtoa" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "env_logger" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "fst" version = "0.3.0" +source = "git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state#6e0ab4e4ee5443cc55079996bf9f703086322c33" dependencies = [ "byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "fuchsia-zircon" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "fuchsia-zircon-sys" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "futures" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "httparse" -version = "1.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "idna" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "iovec" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "itoa" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "lazy_static" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "lazycell" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "levenshtein_automata" version = "0.1.0" +source = "git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst#5e8183a7634c4a0182ea7bb398140b2fe9854f77" dependencies = [ - "fst 0.3.0", + "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", ] [[package]] @@ -169,27 +44,6 @@ name = "libc" version = "0.2.40" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "log" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "log" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "matches" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "memmap" version = "0.6.2" @@ -199,68 +53,6 @@ dependencies = [ "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "memoffset" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "mio" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "miow" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "net2" -version = "0.2.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "nodrop" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "num_cpus" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "percent-encoding" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "proc-macro2" version = "0.3.8" @@ -277,41 +69,15 @@ dependencies = [ "proc-macro2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "rand" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "raptor" version = "0.1.0" dependencies = [ "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "fst 0.3.0", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "levenshtein_automata 0.1.0", + "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)", + "levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)", "serde 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)", - "tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -324,21 +90,6 @@ dependencies = [ "serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "redox_syscall" -version = "0.1.37" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "scoped-tls" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "scopeguard" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "serde" version = "1.0.54" @@ -364,21 +115,6 @@ dependencies = [ "serde 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "slab" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "slab" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "smallvec" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "syn" version = "0.13.9" @@ -389,220 +125,11 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "take" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "time" -version = "0.1.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-fs 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-threadpool 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-timer 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-udp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-core" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-timer 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-executor" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-fs" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-threadpool 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-io" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-minihttp" -version = "0.1.0" -source = "git+https://github.com/Kerollmops/tokio-minihttp.git#1e3f5655e4e64171b87e80d5f872db1345eab54a" -dependencies = [ - "bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-proto" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", - "net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-reactor" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-service" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-tcp" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-threadpool" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-timer" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "tokio-udp" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)", - "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "tokio-reactor 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "unicode-normalization" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "unicode-xid" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "url" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "idna 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", - "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "winapi" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "winapi" version = "0.3.4" @@ -612,11 +139,6 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "winapi-build" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" @@ -627,85 +149,22 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "ws2_32-sys" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [metadata] -"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" "checksum bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bda13183df33055cbb84b847becce220d392df502ebe7a4a78d7021771ed94d0" -"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789" "checksum byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "73b5bdfe7ee3ad0b99c9801d58807a9dbc9e09196365b0203853b99889ab3c87" -"checksum bytes 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "2f1d50c876fb7545f5f289cd8b2aee3f359d073ae819eed5d6373638e2c61e59" -"checksum cfg-if 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "405216fd8fe65f718daa7102ea808a946b6ce40c742998fbfd3463645552de18" -"checksum crossbeam-deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe8153ef04a7594ded05b427ffad46ddeaf22e63fd48d42b3e1e3bb4db07cae7" -"checksum crossbeam-epoch 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9b4e2817eb773f770dcb294127c011e22771899c21d18fce7dd739c0b9832e81" -"checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b" "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" -"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" -"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" -"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" -"checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c" -"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37" -"checksum idna 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "014b298351066f1512874135335d62a789ffe78a9974f94b43ed5621951eaf7d" -"checksum iovec 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "dbe6e417e7d0975db6512b90796e8ce223145ac4e33c377e4a42882a0e88bb08" +"checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)" = "" "checksum itoa 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c069bbec61e1ca5a596166e55dfe4773ff745c3d16b700013bcaff9a6df2c682" -"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d" -"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef" +"checksum levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "" "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b" -"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" -"checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2" -"checksum matches 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "100aabe6b8ff4e4a7e32c1c13523379802df0772b82466207ac25b013f193376" "checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" -"checksum memoffset 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0f9dc261e2b62d7a622bf416ea3c5245cdd5d9a7fcc428c0d06804dfce1775b3" -"checksum mio 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)" = "6d771e3ef92d58a8da8df7d6976bfca9371ed1de6619d9d5a5ce5b1f29b85bfe" -"checksum miow 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919" -"checksum net2 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)" = "9044faf1413a1057267be51b5afba8eb1090bd2231c693664aa1db716fe1eae0" -"checksum nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "9a2228dca57108069a5262f2ed8bd2e82496d2e074a06d1ccc7ce1687b6ae0a2" -"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30" -"checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" "checksum proc-macro2 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "1b06e2f335f48d24442b35a19df506a835fb3547bc3c06ef27340da9acf5cae7" "checksum quote 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9949cfe66888ffe1d53e6ec9d9f3b70714083854be20fd5e271b232a017401e8" -"checksum rand 0.3.22 (registry+https://github.com/rust-lang/crates.io-index)" = "15a732abf9d20f0ad8eeb6f909bf6868722d9a06e1e50802b6a70351f40b4eb1" -"checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5" -"checksum redox_syscall 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "0d92eecebad22b767915e4d529f89f28ee96dbbf5a4810d2b844373f136417fd" -"checksum scoped-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "332ffa32bf586782a3efaeb58f127980944bbc8c4d6913a86107ac2a5ab24b28" -"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27" "checksum serde 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)" = "db9c1726bdebaed7ac8afb7028672e068e12cf1b0b97cddd742a3a7939159699" "checksum serde_derive 1.0.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5121751b76f5a2e6f51b4c0d07976f4f04e33ae7a981467c2845e7cd4b67a114" "checksum serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "f3ad6d546e765177cf3dded3c2e424a8040f870083a0e64064746b958ece9cb1" -"checksum slab 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17b4fcaed89ab08ef143da37bc52adbcc04d4a69014f4c1208d6b51f0c47bc23" -"checksum slab 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fdeff4cd9ecff59ec7e3744cbca73dfe5ac35c2aedb2cfba8a1c715a18912e9d" -"checksum smallvec 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c8cbcd6df1e117c2210e13ab5109635ad68a929fcbb8964dc965b76cb5ee013" "checksum syn 0.13.9 (registry+https://github.com/rust-lang/crates.io-index)" = "505550dded6ff93eb63bd9d0ada380ffccd9f51c046a5e80a3078d53fcef0038" -"checksum take 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b157868d8ac1f56b64604539990685fa7611d8fa9e5476cf0c02cf34d32917c5" -"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" -"checksum tokio 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d00555353b013e170ed8bc4e13f648a317d1fd12157dbcae13f7013f6cf29f5" -"checksum tokio-core 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "aeeffbbb94209023feaef3c196a41cbcdafa06b4a6f893f68779bb5e53796f71" -"checksum tokio-executor 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8cac2a7883ff3567e9d66bb09100d09b33d90311feca0206c7ca034bc0c55113" -"checksum tokio-fs 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "76766830bbf9a2d5bfb50c95350d56a2e79e2c80f675967fff448bc615899708" -"checksum tokio-io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "6af9eb326f64b2d6b68438e1953341e00ab3cf54de7e35d92bfc73af8555313a" -"checksum tokio-minihttp 0.1.0 (git+https://github.com/Kerollmops/tokio-minihttp.git)" = "" -"checksum tokio-proto 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fbb47ae81353c63c487030659494b295f6cb6576242f907f203473b191b0389" -"checksum tokio-reactor 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3cedc8e5af5131dc3423ffa4f877cce78ad25259a9a62de0613735a13ebc64b" -"checksum tokio-service 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "24da22d077e0f15f55162bdbdc661228c1581892f52074fb242678d015b45162" -"checksum tokio-tcp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ec9b094851aadd2caf83ba3ad8e8c4ce65a42104f7b94d9e6550023f0407853f" -"checksum tokio-threadpool 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "5783254b10c7c84a56f62c74766ef7e5b83d1f13053218c7cab8d3f2c826fa0e" -"checksum tokio-timer 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535fed0ccee189f3d48447587697ba3fd234b3dbbb091f0ec4613ddfec0a7c4c" -"checksum tokio-udp 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "137bda266504893ac4774e0ec4c2108f7ccdbcb7ac8dced6305fe9e4e0b5041a" -"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" -"checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" -"checksum url 1.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f808aadd8cfec6ef90e4a14eb46f24511824d1ac596b9682703c87056c8678b7" -"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "04e3bd221fcbe8a271359c04f21a76db7d0c6028862d1bb5512d85e1e2eb5bb3" -"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -"checksum ws2_32-sys 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d59cefebd0c892fa2dd6de581e937301d8552cb44489cdff035c6187cb63fa5e" diff --git a/raptor-bin/src/main.rs b/raptor-bin/src/main.rs index 0324d0b02..dda7d808d 100644 --- a/raptor-bin/src/main.rs +++ b/raptor-bin/src/main.rs @@ -3,59 +3,62 @@ extern crate raptor; extern crate serde_json; -#[macro_use] extern crate serde_derive; use std::collections::HashSet; use std::fs::File; use std::io::{BufReader, BufRead}; +use std::iter; +use raptor::{MapBuilder, Map, Value, AttrIndex}; use serde_json::from_str; -use raptor::{MapBuilder, Map}; - -#[derive(Debug, Deserialize)] -struct Product { - product_id: u64, - title: String, - ft: String, -} - fn main() { let data = File::open("products.json_lines").unwrap(); let data = BufReader::new(data); let common_words = { - // TODO don't break if doesn't exist - let file = File::open("fr.stopwords.txt").unwrap(); - let file = BufReader::new(file); - let mut set = HashSet::new(); - - for line in file.lines() { - let words = line.unwrap(); - for word in words.split_whitespace() { - set.insert(word.to_owned()); - } + match File::open("fr.stopwords.txt") { + Ok(file) => { + let file = BufReader::new(file); + let mut set = HashSet::new(); + for line in file.lines().filter_map(|l| l.ok()) { + for word in line.split_whitespace() { + set.insert(word.to_owned()); + } + } + set + }, + Err(e) => { + eprintln!("{:?}", e); + HashSet::new() + }, } - - set }; let mut builder = MapBuilder::new(); for line in data.lines() { let line = line.unwrap(); - // TODO if possible remove String allocation of Product here... - let product: Product = from_str(&line).unwrap(); + let product: serde_json::Value = from_str(&line).unwrap(); - let title = product.title.split_whitespace(); - let description = product.ft.split_whitespace().filter(|&s| s != "Description"); - let words = title.chain(description) - .filter(|&s| s.chars().any(|c| c.is_alphabetic())) // remove that ? - .map(|s| s.trim_matches(|c: char| !c.is_alphabetic()).to_lowercase()) - .filter(|s| !common_words.contains(s)); + // TODO use a real tokenizer + let title = iter::repeat(0).zip(product["title"].as_str().expect("invalid `title`").split_whitespace()) + .filter(|(_, s)| !common_words.contains(*s)) + .enumerate(); + let description = iter::repeat(1).zip(product["ft"].as_str().expect("invalid `ft`").split_whitespace()) + .filter(|(_, s)| !common_words.contains(*s)) + .enumerate(); - for word in words { - builder.insert(word, product.product_id); + let words = title.chain(description); + for (i, (attr, word)) in words { + let value = Value { + id: product["product_id"].as_u64().expect("invalid `product_id`"), + attr_index: AttrIndex { + attribute: attr, + index: i as u64, + }, + }; + builder.insert(word, value); } } @@ -64,5 +67,5 @@ fn main() { let (map, values) = builder.build(map, values).unwrap(); eprintln!("Checking the dump consistency..."); - unsafe { Map::::from_paths("map.fst", "values.vecs").unwrap() }; + unsafe { Map::::from_paths("map.fst", "values.vecs").unwrap() }; } diff --git a/raptor-http/src/main.rs b/raptor-http/src/main.rs index 4db122789..897f7043b 100644 --- a/raptor-http/src/main.rs +++ b/raptor-http/src/main.rs @@ -16,10 +16,10 @@ use tokio_minihttp::{Request, Response, Http}; use tokio_proto::TcpServer; use tokio_service::Service; -use raptor::{Map, OpWithStateBuilder, LevBuilder}; +use raptor::{Map, OpWithStateBuilder, LevBuilder, Value}; struct MainService { - map: Arc>, + map: Arc>, lev_builder: Arc, } diff --git a/src/lib.rs b/src/lib.rs index cfcaba1cc..dde6a0ab1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,3 +14,15 @@ pub use self::map::{ }; pub use self::levenshtein::LevBuilder; + +#[derive(Debug, Serialize, Deserialize)] +pub struct Value { + pub id: u64, + pub attr_index: AttrIndex, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct AttrIndex { + pub attribute: u8, + pub index: u64, +}