mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-01-18 08:48:32 +08:00
Merge pull request #81 from meilisearch/smart-workspace
Change the project to become a workspace
This commit is contained in:
commit
5d0ac3e3e6
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,9 +2,6 @@
|
||||
/target
|
||||
/Cargo.lock
|
||||
|
||||
# the sub target folder
|
||||
http-ui/target
|
||||
|
||||
# datasets
|
||||
*.csv
|
||||
*.mmdb
|
||||
|
1210
Cargo.lock
generated
1210
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
70
Cargo.toml
70
Cargo.toml
@ -1,70 +1,6 @@
|
||||
[package]
|
||||
name = "milli"
|
||||
version = "0.1.0"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.28"
|
||||
bstr = "0.2.13"
|
||||
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||
byteorder = "1.3.4"
|
||||
crossbeam-channel = "0.5.0"
|
||||
csv = "1.1.3"
|
||||
either = "1.6.1"
|
||||
flate2 = "1.0.17"
|
||||
fst = "0.4.5"
|
||||
fxhash = "0.2.1"
|
||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
||||
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
human_format = "1.0.3"
|
||||
jemallocator = "0.3.2"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
linked-hash-map = "0.5.3"
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||
memmap = "0.7.0"
|
||||
near-proximity = { git = "https://github.com/Kerollmops/plane-sweep-proximity", rev = "6608205" }
|
||||
num-traits = "0.2.14"
|
||||
obkv = "0.1.0"
|
||||
once_cell = "1.4.0"
|
||||
ordered-float = "2.0.0"
|
||||
rayon = "1.3.1"
|
||||
regex = "1.4.2"
|
||||
ringtail = "0.3.0"
|
||||
roaring = "0.6.4"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||
slice-group-by = "0.2.6"
|
||||
smallstr = { version = "0.2.0", features = ["serde"] }
|
||||
smallvec = "1.4.0"
|
||||
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
|
||||
tempfile = "3.1.0"
|
||||
uuid = { version = "0.8.1", features = ["v4"] }
|
||||
|
||||
# facet filter parser
|
||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||
pest_derive = "2.1.0"
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.9.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.11"
|
||||
stderrlog = "0.5.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3.3"
|
||||
maplit = "1.0.2"
|
||||
|
||||
[build-dependencies]
|
||||
fst = "0.4.5"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
[[bench]]
|
||||
name = "search"
|
||||
harness = false
|
||||
[workspace]
|
||||
members = ["milli", "http-ui", "infos", "search"]
|
||||
default-members = ["milli"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
2530
http-ui/Cargo.lock
generated
2530
http-ui/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -12,7 +12,7 @@ grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
||||
heed = "0.10.5"
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||
memmap = "0.7.0"
|
||||
milli = { path = ".." }
|
||||
milli = { path = "../milli" }
|
||||
once_cell = "1.4.1"
|
||||
rayon = "1.5.0"
|
||||
structopt = { version = "0.3.14", default-features = false, features = ["wrap_help"] }
|
||||
@ -34,3 +34,6 @@ warp = "0.2.2"
|
||||
log = "0.4.11"
|
||||
stderrlog = "0.5.0"
|
||||
fst = "0.4.5"
|
||||
|
||||
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
|
||||
funty = "=1.1.0"
|
||||
|
17
infos/Cargo.toml
Normal file
17
infos/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "infos"
|
||||
version = "0.1.0"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.28"
|
||||
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||
csv = "1.1.3"
|
||||
heed = "0.10.5"
|
||||
jemallocator = "0.3.2"
|
||||
milli = { path = "../milli" }
|
||||
roaring = "0.6.4"
|
||||
serde_json = "1.0.59"
|
||||
stderrlog = "0.5.0"
|
||||
structopt = { version = "0.3.14", default-features = false }
|
@ -4,12 +4,16 @@ use std::{str, io, fmt};
|
||||
|
||||
use anyhow::Context;
|
||||
use byte_unit::Byte;
|
||||
use crate::Index;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::Index;
|
||||
use structopt::StructOpt;
|
||||
|
||||
use Command::*;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
const MAIN_DB_NAME: &str = "main";
|
||||
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
||||
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||
@ -153,7 +157,18 @@ enum Command {
|
||||
PatchToNewExternalIds,
|
||||
}
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
fn main() -> Result<(), ()> {
|
||||
let opt = Opt::from_args();
|
||||
match run(opt) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
eprintln!("{}", e);
|
||||
Err(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
||||
@ -204,7 +219,7 @@ fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::R
|
||||
let documents_ids = documents_ids.to_owned();
|
||||
index.main.put::<_, ByteSlice, ByteSlice>(
|
||||
wtxn,
|
||||
crate::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
|
||||
milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
|
||||
&documents_ids,
|
||||
)?;
|
||||
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
|
||||
@ -242,7 +257,7 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
|
||||
rtxn: &'txn heed::RoTxn,
|
||||
db: heed::Database<heed::types::ByteSlice, DC>,
|
||||
field_id: u8,
|
||||
facet_type: crate::facet::FacetType,
|
||||
facet_type: milli::facet::FacetType,
|
||||
string_fn: impl Fn(&str) -> T + 'txn,
|
||||
float_fn: impl Fn(u8, f64, f64) -> T + 'txn,
|
||||
integer_fn: impl Fn(u8, i64, i64) -> T + 'txn,
|
||||
@ -250,8 +265,8 @@ fn facet_values_iter<'txn, DC: 'txn, T>(
|
||||
where
|
||||
DC: heed::BytesDecode<'txn>,
|
||||
{
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
use milli::facet::FacetType;
|
||||
use milli::heed_codec::facet::{
|
||||
FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec,
|
||||
};
|
||||
|
||||
@ -504,7 +519,7 @@ fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
|
||||
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
use std::io::{BufWriter, Write as _};
|
||||
use crate::obkv_to_json;
|
||||
use milli::obkv_to_json;
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut out = BufWriter::new(stdout);
|
||||
@ -548,7 +563,7 @@ fn total_docid_word_positions_size(index: &Index, rtxn: &heed::RoTxn) -> anyhow:
|
||||
|
||||
fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
use heed::types::DecodeIgnore;
|
||||
use crate::{DocumentId, BEU32StrCodec};
|
||||
use milli::{DocumentId, BEU32StrCodec};
|
||||
|
||||
let mut words_counts = Vec::new();
|
||||
let mut count = 0;
|
||||
@ -587,7 +602,7 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
|
||||
|
||||
fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
use heed::types::DecodeIgnore;
|
||||
use crate::BoRoaringBitmapCodec;
|
||||
use milli::BoRoaringBitmapCodec;
|
||||
|
||||
let mut values_length = Vec::new();
|
||||
let mut count = 0;
|
||||
@ -639,7 +654,7 @@ fn database_stats(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Resu
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{Error, BytesDecode};
|
||||
use roaring::RoaringBitmap;
|
||||
use crate::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
use milli::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
|
||||
fn compute_stats<'a, DC: BytesDecode<'a, DItem = RoaringBitmap>>(
|
||||
db: heed::PolyDatabase,
|
||||
@ -720,7 +735,7 @@ fn word_pair_proximities_docids(
|
||||
) -> anyhow::Result<()>
|
||||
{
|
||||
use heed::types::ByteSlice;
|
||||
use crate::RoaringBitmapCodec;
|
||||
use milli::RoaringBitmapCodec;
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
60
milli/Cargo.toml
Normal file
60
milli/Cargo.toml
Normal file
@ -0,0 +1,60 @@
|
||||
[package]
|
||||
name = "milli"
|
||||
version = "0.1.0"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.28"
|
||||
bstr = "0.2.13"
|
||||
byteorder = "1.3.4"
|
||||
crossbeam-channel = "0.5.0"
|
||||
csv = "1.1.3"
|
||||
either = "1.6.1"
|
||||
flate2 = "1.0.17"
|
||||
fst = "0.4.5"
|
||||
fxhash = "0.2.1"
|
||||
grenad = { git = "https://github.com/Kerollmops/grenad.git", rev = "3adcb26" }
|
||||
heed = { version = "0.10.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
human_format = "1.0.3"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
linked-hash-map = "0.5.3"
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", branch = "main" }
|
||||
memmap = "0.7.0"
|
||||
num-traits = "0.2.14"
|
||||
obkv = "0.1.0"
|
||||
once_cell = "1.4.0"
|
||||
ordered-float = "2.0.0"
|
||||
rayon = "1.3.1"
|
||||
regex = "1.4.2"
|
||||
roaring = "0.6.4"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||
smallstr = { version = "0.2.0", features = ["serde"] }
|
||||
smallvec = "1.4.0"
|
||||
tempfile = "3.1.0"
|
||||
uuid = { version = "0.8.1", features = ["v4"] }
|
||||
|
||||
# facet filter parser
|
||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||
pest_derive = "2.1.0"
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.9.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.11"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3.3"
|
||||
maplit = "1.0.2"
|
||||
|
||||
[build-dependencies]
|
||||
fst = "0.4.5"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
[[bench]]
|
||||
name = "search"
|
||||
harness = false
|
@ -3,15 +3,14 @@
|
||||
mod criterion;
|
||||
mod external_documents_ids;
|
||||
mod fields_ids_map;
|
||||
mod index;
|
||||
mod mdfs;
|
||||
mod query_tokens;
|
||||
mod search;
|
||||
mod update_store;
|
||||
pub mod facet;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
pub mod proximity;
|
||||
pub mod subcommand;
|
||||
pub mod update;
|
||||
|
||||
use std::borrow::Cow;
|
16
search/Cargo.toml
Normal file
16
search/Cargo.toml
Normal file
@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "search"
|
||||
version = "0.1.0"
|
||||
authors = ["Clément Renault <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.28"
|
||||
byte-unit = { version = "4.0.9", default-features = false, features = ["std"] }
|
||||
heed = "0.10.5"
|
||||
jemallocator = "0.3.2"
|
||||
log = "0.4.11"
|
||||
milli = { path = "../milli" }
|
||||
serde_json = "1.0.59"
|
||||
stderrlog = "0.5.0"
|
||||
structopt = { version = "0.3.14", default-features = false }
|
@ -8,7 +8,11 @@ use heed::EnvOpenOptions;
|
||||
use log::debug;
|
||||
use structopt::StructOpt;
|
||||
|
||||
use crate::{Index, obkv_to_json};
|
||||
use milli::{Index, obkv_to_json};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
/// A simple search helper binary for the milli project.
|
||||
@ -35,7 +39,18 @@ pub struct Opt {
|
||||
print_facet_distribution: bool,
|
||||
}
|
||||
|
||||
pub fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
fn main() -> Result<(), ()> {
|
||||
let opt = Opt::from_args();
|
||||
match run(opt) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
eprintln!("{}", e);
|
||||
Err(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
22
src/main.rs
22
src/main.rs
@ -1,22 +0,0 @@
|
||||
use structopt::StructOpt;
|
||||
|
||||
use milli::subcommand::infos::{self, Opt as InfosOpt};
|
||||
use milli::subcommand::search::{self, Opt as SearchOpt};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli", about = "The milli project.")]
|
||||
enum Command {
|
||||
Infos(InfosOpt),
|
||||
Search(SearchOpt),
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
match Command::from_args() {
|
||||
Command::Infos(opt) => infos::run(opt),
|
||||
Command::Search(opt) => search::run(opt),
|
||||
}
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
pub mod infos;
|
||||
pub mod search;
|
Loading…
Reference in New Issue
Block a user