mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-26 12:05:05 +08:00
Change binary option and add env var support
This commit is contained in:
parent
e3426d5b7a
commit
58d2aad309
@ -29,9 +29,9 @@ pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false);
|
|||||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
||||||
let mut meilisearch = MeiliSearch::builder();
|
let mut meilisearch = MeiliSearch::builder();
|
||||||
|
|
||||||
// enable autobatching?
|
// disable autobatching?
|
||||||
AUTOBATCHING_ENABLED.store(
|
let _ = AUTOBATCHING_ENABLED.store(
|
||||||
opt.scheduler_options.enable_auto_batching,
|
opt.scheduler_options.disable_auto_batching,
|
||||||
std::sync::atomic::Ordering::Relaxed,
|
std::sync::atomic::Ordering::Relaxed,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -41,27 +41,10 @@ pub struct IndexerOpts {
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Parser, Default, Serialize)]
|
#[derive(Debug, Clone, Parser, Default, Serialize)]
|
||||||
pub struct SchedulerConfig {
|
pub struct SchedulerConfig {
|
||||||
/// enable the autobatching experimental feature
|
/// The engine will disable task auto-batching,
|
||||||
#[clap(long, hide = true)]
|
/// and will sequencialy compute each task one by one.
|
||||||
pub enable_auto_batching: bool,
|
#[clap(long, env = "DISABLE_AUTO_BATCHING")]
|
||||||
|
pub disable_auto_batching: bool,
|
||||||
// The maximum number of updates of the same type that can be batched together.
|
|
||||||
// If unspecified, this is unlimited. A value of 0 is interpreted as 1.
|
|
||||||
#[clap(long, requires = "enable-auto-batching", hide = true)]
|
|
||||||
pub max_batch_size: Option<usize>,
|
|
||||||
|
|
||||||
// The maximum number of documents in a document batch. Since batches must contain at least one
|
|
||||||
// update for the scheduler to make progress, the number of documents in a batch will be at
|
|
||||||
// least the number of documents of its first update.
|
|
||||||
#[clap(long, requires = "enable-auto-batching", hide = true)]
|
|
||||||
pub max_documents_per_batch: Option<usize>,
|
|
||||||
|
|
||||||
/// Debounce duration in seconds
|
|
||||||
///
|
|
||||||
/// When a new task is enqueued, the scheduler waits for `debounce_duration_sec` seconds for new updates before
|
|
||||||
/// starting to process a batch of updates.
|
|
||||||
#[clap(long, requires = "enable-auto-batching", hide = true)]
|
|
||||||
pub debounce_duration_sec: Option<u64>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<&IndexerOpts> for IndexerConfig {
|
impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||||
|
@ -3,7 +3,6 @@ use std::collections::{hash_map::Entry, BinaryHeap, HashMap, VecDeque};
|
|||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
use std::slice;
|
use std::slice;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use atomic_refcell::AtomicRefCell;
|
use atomic_refcell::AtomicRefCell;
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
@ -248,17 +247,10 @@ impl Scheduler {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
store: TaskStore,
|
store: TaskStore,
|
||||||
performers: Vec<Arc<dyn BatchHandler + Sync + Send + 'static>>,
|
performers: Vec<Arc<dyn BatchHandler + Sync + Send + 'static>>,
|
||||||
mut config: SchedulerConfig,
|
config: SchedulerConfig,
|
||||||
) -> Result<Arc<RwLock<Self>>> {
|
) -> Result<Arc<RwLock<Self>>> {
|
||||||
let (notifier, rcv) = watch::channel(());
|
let (notifier, rcv) = watch::channel(());
|
||||||
|
|
||||||
let debounce_time = config.debounce_duration_sec;
|
|
||||||
|
|
||||||
// Disable autobatching
|
|
||||||
if !config.enable_auto_batching {
|
|
||||||
config.max_batch_size = Some(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
let this = Self {
|
let this = Self {
|
||||||
snapshots: VecDeque::new(),
|
snapshots: VecDeque::new(),
|
||||||
tasks: TaskQueue::default(),
|
tasks: TaskQueue::default(),
|
||||||
@ -275,12 +267,7 @@ impl Scheduler {
|
|||||||
|
|
||||||
let this = Arc::new(RwLock::new(this));
|
let this = Arc::new(RwLock::new(this));
|
||||||
|
|
||||||
let update_loop = UpdateLoop::new(
|
let update_loop = UpdateLoop::new(this.clone(), performers, rcv);
|
||||||
this.clone(),
|
|
||||||
performers,
|
|
||||||
debounce_time.filter(|&v| v > 0).map(Duration::from_secs),
|
|
||||||
rcv,
|
|
||||||
);
|
|
||||||
|
|
||||||
tokio::task::spawn_local(update_loop.run());
|
tokio::task::spawn_local(update_loop.run());
|
||||||
|
|
||||||
@ -497,27 +484,17 @@ fn make_batch(tasks: &mut TaskQueue, config: &SchedulerConfig) -> Processing {
|
|||||||
match list.peek() {
|
match list.peek() {
|
||||||
Some(pending) if pending.kind == kind => {
|
Some(pending) if pending.kind == kind => {
|
||||||
// We always need to process at least one task for the scheduler to make progress.
|
// We always need to process at least one task for the scheduler to make progress.
|
||||||
if task_list.len() >= config.max_batch_size.unwrap_or(usize::MAX).max(1)
|
if config.disable_auto_batching && task_list.len() > 0 {
|
||||||
{
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
let pending = list.pop().unwrap();
|
let pending = list.pop().unwrap();
|
||||||
task_list.push(pending.id);
|
task_list.push(pending.id);
|
||||||
|
|
||||||
// We add the number of documents to the count if we are scheduling document additions and
|
// We add the number of documents to the count if we are scheduling document additions.
|
||||||
// stop adding if we already have enough.
|
|
||||||
//
|
|
||||||
// We check that bound only after adding the current task to the batch, so that a batch contains at least one task.
|
|
||||||
match pending.kind {
|
match pending.kind {
|
||||||
TaskType::DocumentUpdate { number }
|
TaskType::DocumentUpdate { number }
|
||||||
| TaskType::DocumentAddition { number } => {
|
| TaskType::DocumentAddition { number } => {
|
||||||
doc_count += number;
|
doc_count += number;
|
||||||
|
|
||||||
if doc_count
|
|
||||||
>= config.max_documents_per_batch.unwrap_or(usize::MAX)
|
|
||||||
{
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
use tokio::sync::{watch, RwLock};
|
use tokio::sync::{watch, RwLock};
|
||||||
use tokio::time::interval_at;
|
|
||||||
|
|
||||||
use super::batch::Batch;
|
use super::batch::Batch;
|
||||||
use super::error::Result;
|
use super::error::Result;
|
||||||
@ -17,20 +15,17 @@ pub struct UpdateLoop {
|
|||||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||||
|
|
||||||
notifier: Option<watch::Receiver<()>>,
|
notifier: Option<watch::Receiver<()>>,
|
||||||
debounce_duration: Option<Duration>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UpdateLoop {
|
impl UpdateLoop {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
scheduler: Arc<RwLock<Scheduler>>,
|
scheduler: Arc<RwLock<Scheduler>>,
|
||||||
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
performers: Vec<Arc<dyn BatchHandler + Send + Sync + 'static>>,
|
||||||
debuf_duration: Option<Duration>,
|
|
||||||
notifier: watch::Receiver<()>,
|
notifier: watch::Receiver<()>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
scheduler,
|
scheduler,
|
||||||
performers,
|
performers,
|
||||||
debounce_duration: debuf_duration,
|
|
||||||
notifier: Some(notifier),
|
notifier: Some(notifier),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -43,11 +38,6 @@ impl UpdateLoop {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(t) = self.debounce_duration {
|
|
||||||
let mut interval = interval_at(tokio::time::Instant::now() + t, t);
|
|
||||||
interval.tick().await;
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Err(e) = self.process_next_batch().await {
|
if let Err(e) = self.process_next_batch().await {
|
||||||
log::error!("an error occurred while processing an update batch: {}", e);
|
log::error!("an error occurred while processing an update batch: {}", e);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user