diff --git a/Cargo.lock b/Cargo.lock index 4d80cca36..e8eefa09b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,6 +99,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +[[package]] +name = "byte-unit" +version = "4.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c8758c32833faaae35b24a73d332e62d0528e89076ae841c63940e37008b153" +dependencies = [ + "utf8-width", +] + [[package]] name = "byteorder" version = "1.3.4" @@ -669,6 +678,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bstr", + "byte-unit", "byteorder", "criterion", "crossbeam-channel", @@ -1440,6 +1450,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf8-width" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa" + [[package]] name = "uuid" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 79c947877..a2e2aa0df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] anyhow = "1.0.28" bstr = "0.2.13" +byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byteorder = "1.3.4" crossbeam-channel = "0.5.0" csv = "1.1.3" diff --git a/http-ui/Cargo.lock b/http-ui/Cargo.lock index 6f54aae6c..3d3581ca6 100644 --- a/http-ui/Cargo.lock +++ b/http-ui/Cargo.lock @@ -196,6 +196,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" +[[package]] +name = "byte-unit" +version = "4.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c8758c32833faaae35b24a73d332e62d0528e89076ae841c63940e37008b153" +dependencies = [ + "utf8-width", +] + [[package]] name = "byteorder" version = "1.3.4" @@ -759,6 +768,7 @@ dependencies = [ "askama", "askama_warp", "async-compression", + "byte-unit", "bytes", "flate2", "futures", @@ -1010,6 +1020,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bstr", + "byte-unit", "byteorder", "crossbeam-channel", "csv", @@ -2212,6 +2223,12 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7" +[[package]] +name = "utf8-width" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa" + [[package]] name = "uuid" version = "0.8.1" diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 4983b2a77..6b236496e 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -8,6 +8,7 @@ edition = "2018" [dependencies] anyhow = "1.0.28" async-compression = { version = "0.3.6", features = ["gzip", "tokio-02"] } +byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" } heed = "0.10.5" memmap = "0.7.0" diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index bb407fd39..80c65716d 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -11,6 +11,7 @@ use std::{mem, io}; use askama_warp::Template; use async_compression::tokio_02::write::GzipEncoder; +use byte_unit::Byte; use flate2::read::GzDecoder; use futures::stream; use futures::{FutureExt, StreamExt}; @@ -44,13 +45,13 @@ pub struct Opt { /// The maximum size the database can take on disk. It is recommended to specify /// the whole disk space (value must be a multiple of a page size). - #[structopt(long = "db-size", default_value = "107374182400")] // 100 GB - database_size: usize, + #[structopt(long = "db-size", default_value = "100 GiB")] + database_size: Byte, /// The maximum size the database that stores the updates can take on disk. It is recommended /// to specify the whole disk space (value must be a multiple of a page size). - #[structopt(long = "udb-size", default_value = "10737418240")] // 10 GB - update_database_size: usize, + #[structopt(long = "udb-size", default_value = "10 GiB")] + update_database_size: Byte, /// Disable document highlighting on the dashboard. #[structopt(long)] @@ -84,8 +85,8 @@ pub struct IndexerOpt { /// /// It is automatically split by the number of jobs e.g. if you use 7 jobs /// and 7 GB of max memory, each thread will use a maximum of 1 GB. - #[structopt(long, default_value = "7516192768")] // 7 GB - pub max_memory: usize, + #[structopt(long, default_value = "7 GiB")] + pub max_memory: Byte, /// Size of the linked hash map cache when indexing. /// The bigger it is, the faster the indexing is but the more memory it takes. @@ -108,8 +109,8 @@ pub struct IndexerOpt { /// /// File fusing must only be enable on file systems that support the `FALLOC_FL_COLLAPSE_RANGE`, /// (i.e. ext4 and XFS). File fusing will only work if the `enable-chunk-fusing` is set. - #[structopt(long, default_value = "4294967296")] // 4 GB - pub chunk_fusing_shrink_size: u64, + #[structopt(long, default_value = "4 GiB")] + pub chunk_fusing_shrink_size: Byte, /// Enable the chunk fusing or not, this reduces the amount of disk used by a factor of 2. #[structopt(long)] @@ -281,7 +282,7 @@ async fn main() -> anyhow::Result<()> { create_dir_all(&opt.database)?; let mut options = EnvOpenOptions::new(); - options.map_size(opt.database_size); + options.map_size(opt.database_size.get_bytes() as usize); // Setup the global thread pool let jobs = opt.indexer.indexing_jobs.unwrap_or(0); @@ -293,7 +294,7 @@ async fn main() -> anyhow::Result<()> { // Setup the LMDB based update database. let mut update_store_options = EnvOpenOptions::new(); - update_store_options.map_size(opt.update_database_size); + update_store_options.map_size(opt.update_database_size.get_bytes() as usize); let update_store_path = opt.database.join("updates.mdb"); create_dir_all(&update_store_path)?; @@ -316,10 +317,10 @@ async fn main() -> anyhow::Result<()> { } update_builder.thread_pool(GLOBAL_THREAD_POOL.get().unwrap()); update_builder.log_every_n(indexer_opt_cloned.log_every_n); - update_builder.max_memory(indexer_opt_cloned.max_memory); + update_builder.max_memory(indexer_opt_cloned.max_memory.get_bytes() as usize); update_builder.linked_hash_map_size(indexer_opt_cloned.linked_hash_map_size); update_builder.chunk_compression_type(indexer_opt_cloned.chunk_compression_type); - update_builder.chunk_fusing_shrink_size(indexer_opt_cloned.chunk_fusing_shrink_size); + update_builder.chunk_fusing_shrink_size(indexer_opt_cloned.chunk_fusing_shrink_size.get_bytes()); // we extract the update type and execute the update itself. let result: anyhow::Result<()> = match meta { diff --git a/src/subcommand/infos.rs b/src/subcommand/infos.rs index ccac80101..81c2a2fed 100644 --- a/src/subcommand/infos.rs +++ b/src/subcommand/infos.rs @@ -2,9 +2,10 @@ use std::path::PathBuf; use std::{str, io, fmt}; use anyhow::Context; +use byte_unit::Byte; +use crate::Index; use heed::EnvOpenOptions; use structopt::StructOpt; -use crate::Index; use Command::*; @@ -39,8 +40,8 @@ pub struct Opt { /// The maximum size the database can take on disk. It is recommended to specify /// the whole disk space (value must be a multiple of a page size). - #[structopt(long = "db-size", default_value = "107374182400")] // 100 GB - database_size: usize, + #[structopt(long = "db-size", default_value = "100 GiB")] + database_size: Byte, /// Verbose mode (-v, -vv, -vvv, etc.) #[structopt(short, long, parse(from_occurrences))] @@ -159,7 +160,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { .init()?; let mut options = EnvOpenOptions::new(); - options.map_size(opt.database_size); + options.map_size(opt.database_size.get_bytes() as usize); // Open the LMDB database. let index = Index::new(options, opt.database)?; diff --git a/src/subcommand/search.rs b/src/subcommand/search.rs index 13b306d47..a4bdae870 100644 --- a/src/subcommand/search.rs +++ b/src/subcommand/search.rs @@ -5,6 +5,7 @@ use std::path::PathBuf; use std::time::Instant; use anyhow::Context; +use byte_unit::Byte; use heed::EnvOpenOptions; use log::debug; use structopt::StructOpt; @@ -21,8 +22,8 @@ pub struct Opt { /// The maximum size the database can take on disk. It is recommended to specify /// the whole disk space (value must be a multiple of a page size). - #[structopt(long = "db-size", default_value = "107374182400")] // 100 GB - database_size: usize, + #[structopt(long = "db-size", default_value = "100 GiB")] + database_size: Byte, /// Verbose mode (-v, -vv, -vvv, etc.) #[structopt(short, long, parse(from_occurrences))] @@ -41,7 +42,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { std::fs::create_dir_all(&opt.database)?; let mut options = EnvOpenOptions::new(); - options.map_size(opt.database_size); + options.map_size(opt.database_size.get_bytes() as usize); // Open the LMDB database. let index = Index::new(options, &opt.database)?;