mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Merge pull request #1588 from meilisearch/test-new-indexer
Integrate the new indexer
This commit is contained in:
commit
33514b28be
95
Cargo.lock
generated
95
Cargo.lock
generated
@ -458,6 +458,20 @@ name = "bytemuck"
|
|||||||
version = "1.7.0"
|
version = "1.7.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9966d2ab714d0f785dbac0a0396251a35280aeb42413281617d0209ab4898435"
|
checksum = "9966d2ab714d0f785dbac0a0396251a35280aeb42413281617d0209ab4898435"
|
||||||
|
dependencies = [
|
||||||
|
"bytemuck_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bytemuck_derive"
|
||||||
|
version = "1.0.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2 1.0.27",
|
||||||
|
"quote 1.0.9",
|
||||||
|
"syn 1.0.73",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "byteorder"
|
name = "byteorder"
|
||||||
@ -630,6 +644,12 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core-foundation-sys"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cow-utils"
|
name = "cow-utils"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@ -1097,13 +1117,14 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grenad"
|
name = "grenad"
|
||||||
version = "0.1.0"
|
version = "0.3.0"
|
||||||
source = "git+https://github.com/Kerollmops/grenad.git?rev=3adcb26#3adcb267dcbc590c7da10eb5f887a254865b3dbe"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7824d499230110f4e4a8d4fd3fd4dc15c1347fce5082e4bba82eef17f43e1ed8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"flate2",
|
"flate2",
|
||||||
"log",
|
"lz4_flex",
|
||||||
"nix",
|
|
||||||
"snap",
|
"snap",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"zstd",
|
"zstd",
|
||||||
@ -1161,8 +1182,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed"
|
name = "heed"
|
||||||
version = "0.12.0"
|
version = "0.12.1"
|
||||||
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
|
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"heed-traits",
|
"heed-traits",
|
||||||
@ -1180,12 +1201,12 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-traits"
|
name = "heed-traits"
|
||||||
version = "0.7.0"
|
version = "0.7.0"
|
||||||
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
|
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "heed-types"
|
name = "heed-types"
|
||||||
version = "0.7.2"
|
version = "0.7.2"
|
||||||
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#8e5dc6d71c8166a8d7d0db059e6e51478942b551"
|
source = "git+https://github.com/Kerollmops/heed?tag=v0.12.1#fc017cf3394af737f92fd71e16f0499a78b79d65"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode",
|
"bincode",
|
||||||
"heed-traits",
|
"heed-traits",
|
||||||
@ -1534,6 +1555,15 @@ dependencies = [
|
|||||||
"syn 0.15.44",
|
"syn 0.15.44",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lz4_flex"
|
||||||
|
version = "0.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5827b976d911b5d2e42b2ccfc7c0d2461a1414e8280436885218762fc529b3f8"
|
||||||
|
dependencies = [
|
||||||
|
"twox-hash",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "main_error"
|
name = "main_error"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
@ -1619,6 +1649,7 @@ dependencies = [
|
|||||||
"siphasher",
|
"siphasher",
|
||||||
"slice-group-by",
|
"slice-group-by",
|
||||||
"structopt",
|
"structopt",
|
||||||
|
"sysinfo",
|
||||||
"tar",
|
"tar",
|
||||||
"tempdir",
|
"tempdir",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
@ -1675,13 +1706,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "0.11.0"
|
version = "0.12.0"
|
||||||
source = "git+https://github.com/meilisearch/milli.git?tag=v0.11.0#c51bb6789cb3fbb6511138374b3443f9116a445c"
|
source = "git+https://github.com/meilisearch/milli.git?tag=v0.12.0#5cbe8793251bbf143434c8a4c4e7195ca6c5f2ac"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bstr",
|
"bstr",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"chrono",
|
"chrono",
|
||||||
"concat-arrays",
|
"concat-arrays",
|
||||||
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"either",
|
"either",
|
||||||
"flate2",
|
"flate2",
|
||||||
@ -1788,18 +1820,6 @@ dependencies = [
|
|||||||
"syn 1.0.73",
|
"syn 1.0.73",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nix"
|
|
||||||
version = "0.19.1"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2"
|
|
||||||
dependencies = [
|
|
||||||
"bitflags",
|
|
||||||
"cc",
|
|
||||||
"cfg-if 1.0.0",
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "normalize-line-endings"
|
name = "normalize-line-endings"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
@ -2690,6 +2710,12 @@ dependencies = [
|
|||||||
"version_check",
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "static_assertions"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stdweb"
|
name = "stdweb"
|
||||||
version = "0.4.20"
|
version = "0.4.20"
|
||||||
@ -2812,6 +2838,21 @@ dependencies = [
|
|||||||
"unicode-xid 0.2.2",
|
"unicode-xid 0.2.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sysinfo"
|
||||||
|
version = "0.20.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0af066e6272f2175c1783cfc2ebf3e2d8dfe2c182b00677fdeccbf8291af83fb"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
"ntapi",
|
||||||
|
"once_cell",
|
||||||
|
"rayon",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tar"
|
name = "tar"
|
||||||
version = "0.4.35"
|
version = "0.4.35"
|
||||||
@ -3052,6 +3093,16 @@ version = "0.2.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
|
checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "twox-hash"
|
||||||
|
version = "1.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1f559b464de2e2bdabcac6a210d12e9b5a5973c251e102c44c585c71d51bd78e"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"static_assertions",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typenum"
|
name = "typenum"
|
||||||
version = "1.13.0"
|
version = "1.13.0"
|
||||||
|
@ -50,7 +50,7 @@ main_error = "0.1.0"
|
|||||||
meilisearch-error = { path = "../meilisearch-error" }
|
meilisearch-error = { path = "../meilisearch-error" }
|
||||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" }
|
||||||
memmap = "0.7.0"
|
memmap = "0.7.0"
|
||||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.11.0" }
|
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.12.0" }
|
||||||
mime = "0.3.16"
|
mime = "0.3.16"
|
||||||
num_cpus = "1.13.0"
|
num_cpus = "1.13.0"
|
||||||
once_cell = "1.5.2"
|
once_cell = "1.5.2"
|
||||||
@ -76,6 +76,7 @@ pin-project = "1.0.7"
|
|||||||
whoami = { version = "1.1.2", optional = true }
|
whoami = { version = "1.1.2", optional = true }
|
||||||
reqwest = { version = "0.11.3", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
reqwest = { version = "0.11.3", features = ["json", "rustls-tls"], default-features = false, optional = true }
|
||||||
serdeval = "0.1.0"
|
serdeval = "0.1.0"
|
||||||
|
sysinfo = "0.20.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
actix-rt = "2.1.0"
|
actix-rt = "2.1.0"
|
||||||
|
@ -14,10 +14,8 @@ pub struct UpdateHandler {
|
|||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
thread_pool: ThreadPool,
|
thread_pool: ThreadPool,
|
||||||
log_frequency: usize,
|
log_frequency: usize,
|
||||||
max_memory: usize,
|
max_memory: Option<usize>,
|
||||||
linked_hash_map_size: usize,
|
|
||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
chunk_fusing_shrink_size: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UpdateHandler {
|
impl UpdateHandler {
|
||||||
@ -25,15 +23,14 @@ impl UpdateHandler {
|
|||||||
let thread_pool = rayon::ThreadPoolBuilder::new()
|
let thread_pool = rayon::ThreadPoolBuilder::new()
|
||||||
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
|
.num_threads(opt.indexing_jobs.unwrap_or(num_cpus::get() / 2))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
max_nb_chunks: opt.max_nb_chunks,
|
max_nb_chunks: opt.max_nb_chunks,
|
||||||
chunk_compression_level: opt.chunk_compression_level,
|
chunk_compression_level: opt.chunk_compression_level,
|
||||||
thread_pool,
|
thread_pool,
|
||||||
log_frequency: opt.log_every_n,
|
log_frequency: opt.log_every_n,
|
||||||
max_memory: opt.max_memory.get_bytes() as usize,
|
max_memory: opt.max_memory.map(|m| m.get_bytes() as usize),
|
||||||
linked_hash_map_size: opt.linked_hash_map_size,
|
|
||||||
chunk_compression_type: opt.chunk_compression_type,
|
chunk_compression_type: opt.chunk_compression_type,
|
||||||
chunk_fusing_shrink_size: opt.chunk_fusing_shrink_size.get_bytes(),
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -48,10 +45,10 @@ impl UpdateHandler {
|
|||||||
}
|
}
|
||||||
update_builder.thread_pool(&self.thread_pool);
|
update_builder.thread_pool(&self.thread_pool);
|
||||||
update_builder.log_every_n(self.log_frequency);
|
update_builder.log_every_n(self.log_frequency);
|
||||||
update_builder.max_memory(self.max_memory);
|
if let Some(max_memory) = self.max_memory {
|
||||||
update_builder.linked_hash_map_size(self.linked_hash_map_size);
|
update_builder.max_memory(max_memory);
|
||||||
|
}
|
||||||
update_builder.chunk_compression_type(self.chunk_compression_type);
|
update_builder.chunk_compression_type(self.chunk_compression_type);
|
||||||
update_builder.chunk_fusing_shrink_size(self.chunk_fusing_shrink_size);
|
|
||||||
update_builder
|
update_builder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
|
use byte_unit::ByteError;
|
||||||
|
use std::fmt;
|
||||||
use std::io::{BufReader, Read};
|
use std::io::{BufReader, Read};
|
||||||
|
use std::ops::Deref;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::str::FromStr;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::{error, fs};
|
use std::{error, fs};
|
||||||
|
|
||||||
@ -11,6 +15,7 @@ use rustls::{
|
|||||||
RootCertStore,
|
RootCertStore,
|
||||||
};
|
};
|
||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
|
use sysinfo::{RefreshKind, System, SystemExt};
|
||||||
|
|
||||||
#[derive(Debug, Clone, StructOpt)]
|
#[derive(Debug, Clone, StructOpt)]
|
||||||
pub struct IndexerOpts {
|
pub struct IndexerOpts {
|
||||||
@ -23,13 +28,15 @@ pub struct IndexerOpts {
|
|||||||
#[structopt(long)]
|
#[structopt(long)]
|
||||||
pub max_nb_chunks: Option<usize>,
|
pub max_nb_chunks: Option<usize>,
|
||||||
|
|
||||||
/// The maximum amount of memory to use for the Grenad buffer. It is recommended
|
/// The maximum amount of memory the indexer will use. It defaults to 2/3
|
||||||
/// to use something like 80%-90% of the available memory.
|
/// of the available memory. It is recommended to use something like 80%-90%
|
||||||
|
/// of the available memory, no more.
|
||||||
///
|
///
|
||||||
/// It is automatically split by the number of jobs e.g. if you use 7 jobs
|
/// In case the engine is unable to retrieve the available memory the engine will
|
||||||
/// and 7 GB of max memory, each thread will use a maximum of 1 GB.
|
/// try to use the memory it needs but without real limit, this can lead to
|
||||||
#[structopt(long, default_value = "7 GiB")]
|
/// Out-Of-Memory issues and it is recommended to specify the amount of memory to use.
|
||||||
pub max_memory: Byte,
|
#[structopt(long, default_value)]
|
||||||
|
pub max_memory: MaxMemory,
|
||||||
|
|
||||||
/// Size of the linked hash map cache when indexing.
|
/// Size of the linked hash map cache when indexing.
|
||||||
/// The bigger it is, the faster the indexing is but the more memory it takes.
|
/// The bigger it is, the faster the indexing is but the more memory it takes.
|
||||||
@ -69,7 +76,7 @@ impl Default for IndexerOpts {
|
|||||||
Self {
|
Self {
|
||||||
log_every_n: 100_000,
|
log_every_n: 100_000,
|
||||||
max_nb_chunks: None,
|
max_nb_chunks: None,
|
||||||
max_memory: Byte::from_str("1GiB").unwrap(),
|
max_memory: MaxMemory::default(),
|
||||||
linked_hash_map_size: 500,
|
linked_hash_map_size: 500,
|
||||||
chunk_compression_type: CompressionType::None,
|
chunk_compression_type: CompressionType::None,
|
||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
@ -240,6 +247,57 @@ impl Opt {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A type used to detect the max memory available and use 2/3 of it.
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct MaxMemory(Option<Byte>);
|
||||||
|
|
||||||
|
impl FromStr for MaxMemory {
|
||||||
|
type Err = ByteError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<MaxMemory, ByteError> {
|
||||||
|
Byte::from_str(s).map(Some).map(MaxMemory)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for MaxMemory {
|
||||||
|
fn default() -> MaxMemory {
|
||||||
|
MaxMemory(
|
||||||
|
total_memory_bytes()
|
||||||
|
.map(|bytes| bytes * 2 / 3)
|
||||||
|
.map(Byte::from_bytes),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for MaxMemory {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match self.0 {
|
||||||
|
Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)),
|
||||||
|
None => f.write_str("unknown"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for MaxMemory {
|
||||||
|
type Target = Option<Byte>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the total amount of bytes available or `None` if this system isn't supported.
|
||||||
|
fn total_memory_bytes() -> Option<u64> {
|
||||||
|
if System::IS_SUPPORTED {
|
||||||
|
let memory_kind = RefreshKind::new().with_memory();
|
||||||
|
let mut system = System::new_with_specifics(memory_kind);
|
||||||
|
system.refresh_memory();
|
||||||
|
Some(system.total_memory() * 1024) // KiB into bytes
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
|
fn load_certs(filename: PathBuf) -> Result<Vec<rustls::Certificate>, Box<dyn error::Error>> {
|
||||||
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
|
let certfile = fs::File::open(filename).map_err(|_| "cannot open certificate file")?;
|
||||||
let mut reader = BufReader::new(certfile);
|
let mut reader = BufReader::new(certfile);
|
||||||
|
Loading…
Reference in New Issue
Block a user