From 65e32fecb17d63636465fe2f137b16ff6964754d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 19 Oct 2020 13:44:17 +0200 Subject: [PATCH] Move the binaries into one with subcommands --- Cargo.lock | 125 ++++++++++++++++++++++------- Cargo.toml | 3 +- src/lib.rs | 1 + src/main.rs | 28 +++++++ src/{bin => subcommand}/indexer.rs | 24 +++--- src/{bin => subcommand}/infos.rs | 32 +++----- src/subcommand/mod.rs | 4 + src/{bin => subcommand}/search.rs | 13 +-- src/{bin => subcommand}/serve.rs | 26 +++--- 9 files changed, 168 insertions(+), 88 deletions(-) create mode 100644 src/main.rs rename src/{bin => subcommand}/indexer.rs (98%) rename src/{bin => subcommand}/infos.rs (97%) create mode 100644 src/subcommand/mod.rs rename src/{bin => subcommand}/search.rs (88%) rename src/{bin => subcommand}/serve.rs (94%) diff --git a/Cargo.lock b/Cargo.lock index bb5cf7b2e..02aec6df6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -257,6 +257,15 @@ dependencies = [ "bitflags", ] +[[package]] +name = "cloudabi" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4344512281c643ae7638bbabc3af17a11307803ec8f0fcad9fae512a8bf36467" +dependencies = [ + "bitflags", +] + [[package]] name = "const_fn" version = "0.4.2" @@ -637,7 +646,7 @@ dependencies = [ "indexmap", "log 0.4.11", "slab", - "tokio", + "tokio 0.2.21", "tokio-util", ] @@ -793,7 +802,7 @@ dependencies = [ "pin-project", "socket2", "time", - "tokio", + "tokio 0.2.21", "tower-service", "want", ] @@ -828,6 +837,15 @@ dependencies = [ "bytes", ] +[[package]] +name = "instant" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63312a18f7ea8760cdd0a7c5aac1a619752a246b833545e3e36d1f81f7cd9e66" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "iovec" version = "0.1.4" @@ -939,6 +957,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "lock_api" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.3.9" @@ -1020,7 +1047,7 @@ dependencies = [ "stderrlog", "structopt", "tempfile", - "tokio", + "tokio 0.3.0", "warp", ] @@ -1090,26 +1117,16 @@ dependencies = [ ] [[package]] -name = "mio-named-pipes" -version = "0.1.6" +name = "mio" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5e374eff525ce1c5b7687c4cef63943e7686524a387933ad27ca7ec43779cb3" +checksum = "e53a6ea5f38c0a48ca42159868c6d8e1bd56c0451238856cc08d58563643bdc3" dependencies = [ - "log 0.4.11", - "mio", - "miow 0.3.4", - "winapi 0.3.8", -] - -[[package]] -name = "mio-uds" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afcb699eb26d4332647cc848492bbc15eafb26f08d0304550d5aa1f612e066f0" -dependencies = [ - "iovec", "libc", - "mio", + "log 0.4.11", + "miow 0.3.4", + "ntapi", + "winapi 0.3.8", ] [[package]] @@ -1193,6 +1210,15 @@ dependencies = [ "version_check 0.9.2", ] +[[package]] +name = "ntapi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a31937dea023539c72ddae0e3571deadc1414b300483fa7aaec176168cfa9d2" +dependencies = [ + "winapi 0.3.8", +] + [[package]] name = "num-integer" version = "0.1.43" @@ -1256,6 +1282,32 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "parking_lot" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4893845fa2ca272e647da5d0e46660a314ead9c2fdd9a883aabc32e481a8733" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c361aa727dd08437f2f1447be8b59a33b0edd15e0fcee698f935613d9efbca9b" +dependencies = [ + "cfg-if 0.1.10", + "cloudabi 0.1.0", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi 0.3.8", +] + [[package]] name = "percent-encoding" version = "2.1.0" @@ -1539,7 +1591,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" dependencies = [ - "cloudabi", + "cloudabi 0.0.3", "fuchsia-cprng", "libc", "rand_core 0.4.2", @@ -2027,12 +2079,27 @@ dependencies = [ "futures-core", "iovec", "lazy_static", + "memchr", + "mio 0.6.22", + "pin-project-lite", + "slab", +] + +[[package]] +name = "tokio" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7137dbb0abee577362ccdc7df21605cfcbb949243aeab47dac9ea6ef7d830e21" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "lazy_static", "libc", "memchr", - "mio", - "mio-named-pipes", - "mio-uds", + "mio 0.7.3", "num_cpus", + "parking_lot", "pin-project-lite", "signal-hook-registry", "slab", @@ -2042,9 +2109,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "0.2.5" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c3acc6aa564495a0f2e1d59fab677cd7f81a19994cfc7f3ad0e64301560389" +checksum = "d48caa7b66c7a6ec943edf78d21a594fbeb24e536c781da67d5c32edec54103f" dependencies = [ "proc-macro2", "quote", @@ -2060,7 +2127,7 @@ dependencies = [ "futures", "log 0.4.11", "pin-project", - "tokio", + "tokio 0.2.21", "tungstenite", ] @@ -2075,7 +2142,7 @@ dependencies = [ "futures-sink", "log 0.4.11", "pin-project-lite", - "tokio", + "tokio 0.2.21", ] [[package]] @@ -2263,7 +2330,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "tokio", + "tokio 0.2.21", "tokio-tungstenite", "tower-service", "urlencoding", diff --git a/Cargo.toml b/Cargo.toml index e0492e129..6cf5e0a51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,6 @@ name = "milli" version = "0.1.0" authors = ["Kerollmops "] edition = "2018" -default-run = "indexer" [dependencies] anyhow = "1.0.28" @@ -43,7 +42,7 @@ stderrlog = "0.5.0" askama = "0.10.1" askama_warp = "0.10.0" serde = { version = "1.0", features = ["derive"] } -tokio = { version = "0.2.15", features = ["full"] } +tokio = { version = "0.3.0", features = ["full"] } warp = "0.2.2" [dev-dependencies] diff --git a/src/lib.rs b/src/lib.rs index 4ae60525d..2be4a3b82 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,7 @@ mod search; mod update_store; pub mod heed_codec; pub mod proximity; +pub mod subcommand; pub mod tokenizer; use std::collections::HashMap; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000..436109469 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,28 @@ +use structopt::StructOpt; + +use milli::subcommand::indexer::{self, Opt as IndexerOpt}; +use milli::subcommand::infos::{self, Opt as InfosOpt}; +use milli::subcommand::serve::{self, Opt as ServeOpt}; +use milli::subcommand::search::{self, Opt as SearchOpt}; + +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + +#[derive(Debug, StructOpt)] +#[structopt(name = "milli", about = "The milli project.")] +enum Command { + Serve(ServeOpt), + Indexer(IndexerOpt), + Infos(InfosOpt), + Search(SearchOpt), +} + +fn main() -> anyhow::Result<()> { + match Command::from_args() { + Command::Serve(opt) => serve::run(opt), + Command::Indexer(opt) => indexer::run(opt), + Command::Infos(opt) => infos::run(opt), + Command::Search(opt) => search::run(opt), + } +} diff --git a/src/bin/indexer.rs b/src/subcommand/indexer.rs similarity index 98% rename from src/bin/indexer.rs rename to src/subcommand/indexer.rs index 767f7ca0c..47a429112 100644 --- a/src/bin/indexer.rs +++ b/src/subcommand/indexer.rs @@ -22,9 +22,9 @@ use roaring::RoaringBitmap; use structopt::StructOpt; use tempfile::tempfile; -use milli::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; -use milli::tokenizer::{simple_tokenizer, only_token}; -use milli::{SmallVec32, Index, Position, DocumentId}; +use crate::heed_codec::{CsvStringRecordCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; +use crate::tokenizer::{simple_tokenizer, only_token}; +use crate::{SmallVec32, Index, Position, DocumentId}; const LMDB_MAX_KEY_LENGTH: usize = 511; const ONE_KILOBYTE: usize = 1024 * 1024; @@ -32,18 +32,14 @@ const ONE_KILOBYTE: usize = 1024 * 1024; const MAX_POSITION: usize = 1000; const MAX_ATTRIBUTES: usize = u32::max_value() as usize / MAX_POSITION; -const WORDS_FST_KEY: &[u8] = milli::WORDS_FST_KEY.as_bytes(); -const HEADERS_KEY: &[u8] = milli::HEADERS_KEY.as_bytes(); -const DOCUMENTS_IDS_KEY: &[u8] = milli::DOCUMENTS_IDS_KEY.as_bytes(); - -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes(); +const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes(); +const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes(); #[derive(Debug, StructOpt)] #[structopt(name = "milli-indexer")] /// The indexer binary of the milli project. -struct Opt { +pub struct Opt { /// The database path where the database is located. /// It is created if it doesn't already exist. #[structopt(long = "db", parse(from_os_str))] @@ -191,7 +187,7 @@ fn compute_words_pair_proximities( for ((w1, ps1), (w2, ps2)) in word_positions.iter().cartesian_product(word_positions) { let mut min_prox = None; for (ps1, ps2) in ps1.iter().cartesian_product(ps2) { - let prox = milli::proximity::positions_proximity(*ps1, *ps2); + let prox = crate::proximity::positions_proximity(*ps1, *ps2); let prox = u8::try_from(prox).unwrap(); // We don't care about a word that appear at the // same position or too far from the other. @@ -736,9 +732,7 @@ fn csv_readers( } } -fn main() -> anyhow::Result<()> { - let opt = Opt::from_args(); - +pub fn run(opt: Opt) -> anyhow::Result<()> { stderrlog::new() .verbosity(opt.verbose) .show_level(false) diff --git a/src/bin/infos.rs b/src/subcommand/infos.rs similarity index 97% rename from src/bin/infos.rs rename to src/subcommand/infos.rs index 2f968a8a7..6e17b1f93 100644 --- a/src/bin/infos.rs +++ b/src/subcommand/infos.rs @@ -2,16 +2,12 @@ use std::path::PathBuf; use std::{str, io}; use anyhow::Context; +use crate::Index; use heed::EnvOpenOptions; -use milli::Index; use structopt::StructOpt; use Command::*; -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - const MAIN_DB_NAME: &str = "main"; const WORD_DOCIDS_DB_NAME: &str = "word-docids"; const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; @@ -33,8 +29,8 @@ const POSTINGS_DATABASE_NAMES: &[&str] = &[ ]; #[derive(Debug, StructOpt)] -#[structopt(name = "milli-info", about = "A stats crawler for milli.")] -struct Opt { +/// A stats fetcher for milli. +pub struct Opt { /// The database path where the database is located. /// It is created if it doesn't already exist. #[structopt(long = "db", parse(from_os_str))] @@ -133,8 +129,11 @@ enum Command { }, } -fn main() -> anyhow::Result<()> { - let opt = Opt::from_args(); +pub fn run(opt: Opt) -> anyhow::Result<()> { + let env = EnvOpenOptions::new() + .map_size(opt.database_size) + .max_dbs(10) + .open(&opt.database)?; stderrlog::new() .verbosity(opt.verbose) @@ -142,11 +141,6 @@ fn main() -> anyhow::Result<()> { .timestamp(stderrlog::Timestamp::Off) .init()?; - let env = EnvOpenOptions::new() - .map_size(opt.database_size) - .max_dbs(10) - .open(&opt.database)?; - // Open the LMDB database. let index = Index::new(&env)?; let rtxn = env.read_txn()?; @@ -196,7 +190,7 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho use std::cmp::Reverse; use std::collections::BinaryHeap; use heed::types::{Str, ByteSlice}; - use milli::heed_codec::BEU32StrCodec; + use crate::heed_codec::BEU32StrCodec; let main_name = "main"; let word_docids_name = "word_docids"; @@ -306,7 +300,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow: fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { use heed::types::DecodeIgnore; - use milli::{DocumentId, BEU32StrCodec}; + use crate::{DocumentId, BEU32StrCodec}; let mut words_counts = Vec::new(); let mut count = 0; @@ -345,7 +339,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow:: fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { use heed::types::DecodeIgnore; - use milli::BoRoaringBitmapCodec; + use crate::BoRoaringBitmapCodec; let mut values_length = Vec::new(); let mut count = 0; @@ -397,7 +391,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu use heed::types::ByteSlice; use heed::{Error, BytesDecode}; use roaring::RoaringBitmap; - use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; + use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>( db: heed::PolyDatabase, @@ -478,7 +472,7 @@ fn word_pair_proximities_docids( ) -> anyhow::Result<()> { use heed::types::ByteSlice; - use milli::RoaringBitmapCodec; + use crate::RoaringBitmapCodec; let stdout = io::stdout(); let mut wtr = csv::Writer::from_writer(stdout.lock()); diff --git a/src/subcommand/mod.rs b/src/subcommand/mod.rs new file mode 100644 index 000000000..c7864c565 --- /dev/null +++ b/src/subcommand/mod.rs @@ -0,0 +1,4 @@ +pub mod indexer; +pub mod infos; +pub mod search; +pub mod serve; diff --git a/src/bin/search.rs b/src/subcommand/search.rs similarity index 88% rename from src/bin/search.rs rename to src/subcommand/search.rs index 8c9e9abdb..2d2fc2724 100644 --- a/src/bin/search.rs +++ b/src/subcommand/search.rs @@ -5,16 +5,13 @@ use std::time::Instant; use heed::EnvOpenOptions; use log::debug; -use milli::Index; use structopt::StructOpt; -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +use crate::Index; #[derive(Debug, StructOpt)] -#[structopt(name = "milli-search", about = "A simple search binary for milli project.")] -struct Opt { +/// A simple search helper binary for the milli project. +pub struct Opt { /// The database path where the database is located. /// It is created if it doesn't already exist. #[structopt(long = "db", parse(from_os_str))] @@ -33,9 +30,7 @@ struct Opt { query: Option, } -fn main() -> anyhow::Result<()> { - let opt = Opt::from_args(); - +pub fn run(opt: Opt) -> anyhow::Result<()> { stderrlog::new() .verbosity(opt.verbose) .show_level(false) diff --git a/src/bin/serve.rs b/src/subcommand/serve.rs similarity index 94% rename from src/bin/serve.rs rename to src/subcommand/serve.rs index 082f5fb86..2f6940237 100644 --- a/src/bin/serve.rs +++ b/src/subcommand/serve.rs @@ -11,16 +11,12 @@ use serde::Deserialize; use structopt::StructOpt; use warp::{Filter, http::Response}; -use milli::tokenizer::{simple_tokenizer, TokenType}; -use milli::{Index, SearchResult}; - -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +use crate::tokenizer::{simple_tokenizer, TokenType}; +use crate::{Index, SearchResult}; #[derive(Debug, StructOpt)] -#[structopt(name = "milli", about = "The server binary of the milli project.")] -struct Opt { +/// The HTTP main server of the milli project. +pub struct Opt { /// The database path where the LMDB database is located. /// It is created if it doesn't already exist. #[structopt(long = "db", parse(from_os_str))] @@ -73,10 +69,7 @@ struct IndexTemplate { docs_count: usize, } -#[tokio::main] -async fn main() -> anyhow::Result<()> { - let opt = Opt::from_args(); - +pub fn run(opt: Opt) -> anyhow::Result<()> { stderrlog::new() .verbosity(opt.verbose) .show_level(false) @@ -231,8 +224,13 @@ async fn main() -> anyhow::Result<()> { .or(dash_logo_black_route) .or(query_route); - let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap(); - warp::serve(routes).run(addr).await; + let addr = SocketAddr::from_str(&opt.http_listen_addr)?; + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()? + .block_on(async { + warp::serve(routes).run(addr).await + }); Ok(()) }