diff --git a/.gitignore b/.gitignore index 70e3cae73..e66eef3d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ - +/rocksdb /target **/*.rs.bk +**/*.idx +**/*.map +**/*.sst diff --git a/Cargo.lock b/Cargo.lock index c5a188efd..73adf77ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,3 +1,11 @@ +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "arrayref" version = "0.3.5" @@ -11,6 +19,16 @@ dependencies = [ "nodrop 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "atty" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "base64" version = "0.5.2" @@ -94,6 +112,20 @@ name = "cfg-if" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "clap" +version = "2.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cloudabi" version = "0.0.3" @@ -674,6 +706,7 @@ dependencies = [ "serde 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)", + "structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "warp 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -705,6 +738,14 @@ name = "redox_syscall" version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rocksdb" version = "0.3.0" @@ -846,6 +887,30 @@ name = "string" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "strsim" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "structopt" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", + "structopt-derive 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "structopt-derive" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "syn" version = "0.14.9" @@ -856,6 +921,24 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "termion" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.43 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "textwrap" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "time" version = "0.1.40" @@ -1074,6 +1157,11 @@ name = "unicode-normalization" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unicode-width" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicode-xid" version = "0.1.0" @@ -1117,6 +1205,11 @@ name = "vcpkg" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "version_check" version = "0.1.4" @@ -1211,8 +1304,10 @@ dependencies = [ ] [metadata] +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum arrayref 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0d382e583f07208808f6b1249e60848879ba3543f57c32277bf52d69c2f0f0ee" "checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" +"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum base64 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "30e93c03064e7590d0466209155251b90c22e37fab1daf2771582598b5827557" "checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9" "checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12" @@ -1225,6 +1320,7 @@ dependencies = [ "checksum bzip2-sys 0.1.6 (git+https://github.com/alexcrichton/bzip2-rs.git)" = "" "checksum cc 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "4a6007c146fdd28d4512a794b07ffe9d8e89e6bf86e2e0c4ddff2e1fb54a0007" "checksum cfg-if 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4e7bb64a8ebb0d856483e1e682ea3422f883c5f5615a90d51a2c82fe87fdd3" +"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" "checksum cmake 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "704fbf3bb5149daab0afb255dbea24a1f08d2f4099cedb9baab6d470d4c5eefb" "checksum crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d663548de7f5cca343f1e0a48d14dcfb0e9eb4e079ec58883b7251539fa10aeb" @@ -1292,6 +1388,7 @@ dependencies = [ "checksum rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e464cd887e869cddcae8792a4ee31d23c7edd516700695608f5b98c67ee0131c" "checksum rand_core 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "edecf0f94da5551fc9b492093e30b041a891657db7940ee221f9d2f66e82eef2" "checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" "checksum rocksdb 0.3.0 (git+https://github.com/pingcap/rust-rocksdb.git)" = "" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e7c066b8e2923f05d4718a06d2622f189ff362bc642bfade6c6629f0440f3827" @@ -1312,7 +1409,12 @@ dependencies = [ "checksum snappy-sys 0.1.0 (git+https://github.com/busyjay/rust-snappy.git?branch=static-link)" = "" "checksum stable_deref_trait 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dba1a27d3efae4351c8051072d619e3ade2820635c3958d826bfea39d59b54c8" "checksum string 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00caf261d6f90f588f8450b8e1230fa0d5be49ee6140fdfbcb55335aff350970" +"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550" +"checksum structopt 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8e9ad6a11096cbecdcca0cc6aa403fdfdbaeda2fb3323a39c98e6a166a1e45a" +"checksum structopt-derive 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4cbce8ccdc62166bd594c14396a3242bf94c337a51dbfa9be1076dd74b3db2af" "checksum syn 0.14.9 (registry+https://github.com/rust-lang/crates.io-index)" = "261ae9ecaa397c42b960649561949d69311f08eeaea86a65696e6e46517cf741" +"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096" +"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b" "checksum tokio 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "fbb6a6e9db2702097bfdfddcb09841211ad423b86c75b5ddaca1d62842ac492c" "checksum tokio-codec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "881e9645b81c2ce95fcb799ded2c29ffb9f25ef5bef909089a420e5961dd8ccb" @@ -1333,6 +1435,7 @@ dependencies = [ "checksum unicase 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "284b6d3db520d67fbe88fd778c21510d1b0ba4a551e5d0fbb023d33405f6de8a" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" "checksum unicode-normalization 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "6a0180bc61fc5a987082bfa111f4cc95c4caff7f9799f3e46df09163a937aa25" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "402bb19d8e03f1d1a7450e2bd613980869438e0666331be3e073089124aa1adc" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" @@ -1340,6 +1443,7 @@ dependencies = [ "checksum urlencoding 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3df3561629a8bb4c57e5a2e4c43348d9e29c7c29d9b1c4c1f47166deca8f37ed" "checksum utf-8 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bab35f71693630bb1953dce0f2bcd780e7cde025027124a202ac08a45ba25141" "checksum vcpkg 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "def296d3eb3b12371b2c7d0e83bfe1403e4db2d7a0bba324a12b21c4ee13143d" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7716c242968ee87e5542f8021178248f267f295a5c4803beae8b8b7fd9bc6051" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum want 0.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "797464475f30ddb8830cc529aaaae648d581f99e2036a928877dfde027ddf6b3" diff --git a/raptor-http/Cargo.toml b/raptor-http/Cargo.toml index 7f3051e43..b78f6bdb8 100644 --- a/raptor-http/Cargo.toml +++ b/raptor-http/Cargo.toml @@ -10,6 +10,7 @@ authors = ["Clément Renault "] raptor = { path = "../raptor" } serde_derive = "1.0" serde_json = "1.0" +structopt = "0.2" serde = "1.0" warp = "0.1" diff --git a/raptor-http/src/main.rs b/raptor-http/src/main.rs index fd14f5bb7..404a6d48f 100644 --- a/raptor-http/src/main.rs +++ b/raptor-http/src/main.rs @@ -1,8 +1,9 @@ #[macro_use] extern crate serde_derive; -use std::env; use std::fs::File; -use std::path::Path; +use std::net::SocketAddr; +use structopt::StructOpt; +use std::path::{Path, PathBuf}; use std::collections::hash_set::HashSet; use std::io::{self, BufReader, BufRead, Write}; use std::sync::Arc; @@ -13,6 +14,21 @@ use raptor::{automaton, Metadata, RankedStream}; use fst::Streamer; use warp::Filter; +#[derive(Debug, StructOpt)] +#[structopt(name = "raptor-http", about = "Raptor but wrapped by an http server.")] +struct Opt { + /// The addresse and port to bind the server to. + #[structopt(short = "l", default_value = "127.0.0.1:3030")] + listen_addr: SocketAddr, + + /// The stop word file, each word must be separated by a newline. + #[structopt(long = "stop-words", parse(from_os_str))] + stop_words: PathBuf, + + /// Meta file name (e.g. relaxed-colden). + meta_name: String, +} + #[derive(Debug, Deserialize)] struct SearchQuery { query: String } @@ -81,7 +97,9 @@ where M: AsRef, } fn main() { - let name = env::args().nth(1).expect("Missing meta file name (e.g. relaxed-colden)"); + let opt = Opt::from_args(); + + let name = opt.meta_name; let map_file = format!("{}.map", name); let idx_file = format!("{}.idx", name); let sst_file = format!("{}.sst", name); @@ -97,7 +115,7 @@ fn main() { let db = DB::open_for_read_only(DBOptions::default(), rocksdb, false).unwrap(); let db = Arc::new(db); - let common_words = common_words("fr.stopwords.txt").unwrap(); + let common_words = common_words(opt.stop_words).expect("reading stop words"); let routes = warp::path("search") .and(warp::query()) @@ -108,5 +126,5 @@ fn main() { .with(warp::reply::with::header("Content-Type", "application/json")) .with(warp::reply::with::header("Access-Control-Allow-Origin", "*")); - warp::serve(routes).run(([127, 0, 0, 1], 3030)); + warp::serve(routes).run(opt.listen_addr); }