diff --git a/Cargo.lock b/Cargo.lock index 5e5acf63f..a6a178510 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -532,6 +532,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + [[package]] name = "fst" version = "0.4.5" @@ -556,9 +562,9 @@ checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" [[package]] name = "funty" -version = "1.2.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1847abb9cb65d566acd5942e94aea9c8f547ad02c98e1649326fc0e8910b8b1e" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" [[package]] name = "futures" @@ -881,6 +887,7 @@ dependencies = [ "either", "flate2", "fst", + "funty", "futures", "grenad", "heed", @@ -968,6 +975,22 @@ dependencies = [ "hashbrown 0.9.1", ] +[[package]] +name = "infos" +version = "0.1.0" +dependencies = [ + "anyhow", + "byte-unit", + "csv", + "heed", + "jemallocator", + "milli", + "roaring", + "serde_json", + "stderrlog", + "structopt", +] + [[package]] name = "input_buffer" version = "0.3.1" @@ -1010,6 +1033,27 @@ version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" +[[package]] +name = "jemalloc-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" +dependencies = [ + "cc", + "fs_extra", + "libc", +] + +[[package]] +name = "jemallocator" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" +dependencies = [ + "jemalloc-sys", + "libc", +] + [[package]] name = "jieba-rs" version = "0.6.2" @@ -1172,7 +1216,6 @@ version = "0.1.0" dependencies = [ "anyhow", "bstr", - "byte-unit", "byteorder", "criterion", "crossbeam-channel", @@ -1204,8 +1247,6 @@ dependencies = [ "serde_json", "smallstr", "smallvec", - "stderrlog", - "structopt", "tempfile", "uuid", ] @@ -1902,6 +1943,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "search" +version = "0.1.0" +dependencies = [ + "anyhow", + "byte-unit", + "heed", + "jemallocator", + "log", + "milli", + "serde_json", + "stderrlog", + "structopt", +] + [[package]] name = "semver" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index d04cae871..16a5ab8d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["milli", "http-ui"] +members = ["milli", "http-ui", "infos", "search"] default-members = ["milli"] [profile.release] diff --git a/http-ui/Cargo.toml b/http-ui/Cargo.toml index 26adbfd56..1326abfc5 100644 --- a/http-ui/Cargo.toml +++ b/http-ui/Cargo.toml @@ -34,3 +34,6 @@ warp = "0.2.2" log = "0.4.11" stderrlog = "0.5.0" fst = "0.4.5" + +# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105) +funty = "=1.1.0" diff --git a/infos/Cargo.toml b/infos/Cargo.toml new file mode 100644 index 000000000..14d52a573 --- /dev/null +++ b/infos/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "infos" +version = "0.1.0" +authors = ["Clément Renault "] +edition = "2018" + +[dependencies] +anyhow = "1.0.28" +byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } +csv = "1.1.3" +heed = "0.10.5" +jemallocator = "0.3.2" +milli = { path = "../milli" } +roaring = "0.6.4" +serde_json = "1.0.59" +stderrlog = "0.5.0" +structopt = { version = "0.3.14", default-features = false } diff --git a/milli/src/subcommand/infos.rs b/infos/src/main.rs similarity index 97% rename from milli/src/subcommand/infos.rs rename to infos/src/main.rs index 51d0492ce..e874385e6 100644 --- a/milli/src/subcommand/infos.rs +++ b/infos/src/main.rs @@ -4,12 +4,16 @@ use std::{str, io, fmt}; use anyhow::Context; use byte_unit::Byte; -use crate::Index; use heed::EnvOpenOptions; +use milli::Index; use structopt::StructOpt; use Command::*; +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; + const MAIN_DB_NAME: &str = "main"; const WORD_DOCIDS_DB_NAME: &str = "word-docids"; const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions"; @@ -153,7 +157,18 @@ enum Command { PatchToNewExternalIds, } -pub fn run(opt: Opt) -> anyhow::Result<()> { +fn main() -> Result<(), ()> { + let opt = Opt::from_args(); + match run(opt) { + Ok(()) => Ok(()), + Err(e) => { + eprintln!("{}", e); + Err(()) + }, + } +} + +fn run(opt: Opt) -> anyhow::Result<()> { stderrlog::new() .verbosity(opt.verbose) .show_level(false) @@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R let documents_ids = documents_ids.to_owned(); index.main.put::<_, ByteSlice, ByteSlice>( wtxn, - crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(), + milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(), &documents_ids, )?; index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?; @@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>( rtxn: &'txn heed::RoTxn, db: heed::Database, field_id: u8, - facet_type: crate::facet::FacetType, + facet_type: milli::facet::FacetType, string_fn: impl Fn(&str) -> T + 'txn, float_fn: impl Fn(u8, f64, f64) -> T + 'txn, integer_fn: impl Fn(u8, i64, i64) -> T + 'txn, @@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>( where DC: heed::BytesDecode<'txn>, { - use crate::facet::FacetType; - use crate::heed_codec::facet::{ + use milli::facet::FacetType; + use milli::heed_codec::facet::{ FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec, }; @@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { use std::io::{BufWriter, Write as _}; - use crate::obkv_to_json; + use milli::obkv_to_json; let stdout = io::stdout(); let mut out = BufWriter::new(stdout); @@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow: fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { use heed::types::DecodeIgnore; - use crate::{DocumentId, BEU32StrCodec}; + use milli::{DocumentId, BEU32StrCodec}; let mut words_counts = Vec::new(); let mut count = 0; @@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow:: fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> { use heed::types::DecodeIgnore; - use crate::BoRoaringBitmapCodec; + use milli::BoRoaringBitmapCodec; let mut values_length = Vec::new(); let mut count = 0; @@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu use heed::types::ByteSlice; use heed::{Error, BytesDecode}; use roaring::RoaringBitmap; - use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; + use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec}; fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>( db: heed::PolyDatabase, @@ -720,7 +735,7 @@ fn word_pair_proximities_docids( ) -> anyhow::Result<()> { use heed::types::ByteSlice; - use crate::RoaringBitmapCodec; + use milli::RoaringBitmapCodec; let stdout = io::stdout(); let mut wtr = csv::Writer::from_writer(stdout.lock()); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index f92980589..7b6d3b7b9 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -7,7 +7,6 @@ edition = "2018" [dependencies] anyhow = "1.0.28" bstr = "0.2.13" -byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } byteorder = "1.3.4" crossbeam-channel = "0.5.0" csv = "1.1.3" @@ -33,7 +32,6 @@ serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1.0.59", features = ["preserve_order"] } smallstr = { version = "0.2.0", features = ["serde"] } smallvec = "1.4.0" -structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] } tempfile = "3.1.0" uuid = { version = "0.8.1", features = ["v4"] } @@ -46,7 +44,6 @@ itertools = "0.9.0" # logging log = "0.4.11" -stderrlog = "0.5.0" [dev-dependencies] criterion = "0.3.3" diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 09a66ea65..7a9afde2d 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -3,15 +3,14 @@ mod criterion; mod external_documents_ids; mod fields_ids_map; -mod index; mod mdfs; mod query_tokens; mod search; mod update_store; pub mod facet; pub mod heed_codec; +pub mod index; pub mod proximity; -pub mod subcommand; pub mod update; use std::borrow::Cow; diff --git a/milli/src/main.rs b/milli/src/main.rs deleted file mode 100644 index acc8733b3..000000000 --- a/milli/src/main.rs +++ /dev/null @@ -1,22 +0,0 @@ -use structopt::StructOpt; - -use milli::subcommand::infos::{self, Opt as InfosOpt}; -use milli::subcommand::search::{self, Opt as SearchOpt}; - -#[cfg(target_os = "linux")] -#[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; - -#[derive(Debug, StructOpt)] -#[structopt(name = "milli", about = "The milli project.")] -enum Command { - Infos(InfosOpt), - Search(SearchOpt), -} - -fn main() -> anyhow::Result<()> { - match Command::from_args() { - Command::Infos(opt) => infos::run(opt), - Command::Search(opt) => search::run(opt), - } -} diff --git a/milli/src/subcommand/mod.rs b/milli/src/subcommand/mod.rs deleted file mode 100644 index 8e2223a5e..000000000 --- a/milli/src/subcommand/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub mod infos; -pub mod search; diff --git a/search/Cargo.toml b/search/Cargo.toml new file mode 100644 index 000000000..947deb70d --- /dev/null +++ b/search/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "search" +version = "0.1.0" +authors = ["Clément Renault "] +edition = "2018" + +[dependencies] +anyhow = "1.0.28" +byte-unit = { version = "4.0.9", default-features = false, features = ["std"] } +heed = "0.10.5" +jemallocator = "0.3.2" +log = "0.4.11" +milli = { path = "../milli" } +serde_json = "1.0.59" +stderrlog = "0.5.0" +structopt = { version = "0.3.14", default-features = false } diff --git a/milli/src/subcommand/search.rs b/search/src/main.rs similarity index 87% rename from milli/src/subcommand/search.rs rename to search/src/main.rs index 0a150209e..d2e727417 100644 --- a/milli/src/subcommand/search.rs +++ b/search/src/main.rs @@ -8,7 +8,11 @@ use heed::EnvOpenOptions; use log::debug; use structopt::StructOpt; -use crate::{Index, obkv_to_json}; +use milli::{Index, obkv_to_json}; + +#[cfg(target_os = "linux")] +#[global_allocator] +static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; #[derive(Debug, StructOpt)] /// A simple search helper binary for the milli project. @@ -35,7 +39,18 @@ pub struct Opt { print_facet_distribution: bool, } -pub fn run(opt: Opt) -> anyhow::Result<()> { +fn main() -> Result<(), ()> { + let opt = Opt::from_args(); + match run(opt) { + Ok(()) => Ok(()), + Err(e) => { + eprintln!("{}", e); + Err(()) + }, + } +} + +fn run(opt: Opt) -> anyhow::Result<()> { stderrlog::new() .verbosity(opt.verbose) .show_level(false)