diff --git a/.github/workflows/trigger-benchmarks-on-message.yml b/.github/workflows/benchmarks-pr.yml
similarity index 100%
rename from .github/workflows/trigger-benchmarks-on-message.yml
rename to .github/workflows/benchmarks-pr.yml
diff --git a/Cargo.lock b/Cargo.lock
index 89f3561bc..a6b22871f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2564,7 +2564,6 @@ dependencies = [
"platform-dirs",
"prometheus",
"puffin",
- "puffin_http",
"rand",
"rayon",
"regex",
@@ -3236,18 +3235,6 @@ dependencies = [
"serde",
]
-[[package]]
-name = "puffin_http"
-version = "0.13.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
-dependencies = [
- "anyhow",
- "crossbeam-channel",
- "log",
- "puffin",
-]
-
[[package]]
name = "quote"
version = "1.0.32"
diff --git a/PROFILING.md b/PROFILING.md
index 4ae21f2f5..daf46bbc2 100644
--- a/PROFILING.md
+++ b/PROFILING.md
@@ -1,14 +1,14 @@
# Profiling Meilisearch
-Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
+Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
-When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
+When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.
-Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
+[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
diff --git a/README.md b/README.md
index 88621729d..cb9475dea 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,12 @@
⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍
+---
+
+### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
+
+---
+
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index ccdcbcbb6..48eae0063 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -19,6 +19,7 @@ one indexing operation.
use std::collections::{BTreeSet, HashSet};
use std::ffi::OsStr;
+use std::fmt;
use std::fs::{self, File};
use std::io::BufWriter;
@@ -199,6 +200,29 @@ impl Batch {
}
}
+impl fmt::Display for Batch {
+ /// A text used when we debug the profiling reports.
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let index_uid = self.index_uid();
+ let tasks = self.ids();
+ match self {
+ Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
+ Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+ Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
+ Batch::Dump(_) => f.write_str("Dump")?,
+ Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
+ Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
+ Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
+ Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
+ Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
+ };
+ match index_uid {
+ Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
+ None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
+ }
+ }
+}
+
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
@@ -213,6 +237,30 @@ impl IndexOperation {
}
}
+impl fmt::Display for IndexOperation {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ IndexOperation::DocumentOperation { .. } => {
+ f.write_str("IndexOperation::DocumentOperation")
+ }
+ IndexOperation::DocumentDeletion { .. } => {
+ f.write_str("IndexOperation::DocumentDeletion")
+ }
+ IndexOperation::IndexDocumentDeletionByFilter { .. } => {
+ f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
+ }
+ IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
+ IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
+ IndexOperation::DocumentClearAndSetting { .. } => {
+ f.write_str("IndexOperation::DocumentClearAndSetting")
+ }
+ IndexOperation::SettingsAndDocumentOperation { .. } => {
+ f.write_str("IndexOperation::SettingsAndDocumentOperation")
+ }
+ }
+ }
+}
+
impl IndexScheduler {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
@@ -581,7 +629,7 @@ impl IndexScheduler {
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
- puffin::profile_function!(format!("{:?}", batch));
+ puffin::profile_function!(batch.to_string());
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs
index 442a43320..a9d242619 100644
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@@ -47,7 +47,7 @@ impl RoFeatures {
Err(FeatureNotEnabledError {
disabled_action: "Getting metrics",
feature: "metrics",
- issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
+ issue_link: "https://github.com/meilisearch/product/discussions/625",
}
.into())
}
@@ -65,6 +65,19 @@ impl RoFeatures {
.into())
}
}
+
+ pub fn check_puffin(&self) -> Result<()> {
+ if self.runtime.export_puffin_reports {
+ Ok(())
+ } else {
+ Err(FeatureNotEnabledError {
+ disabled_action: "Outputting Puffin reports to disk",
+ feature: "export puffin reports",
+ issue_link: "https://github.com/meilisearch/product/discussions/693",
+ }
+ .into())
+ }
+ }
}
impl FeatureData {
diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs
index afcfdb270..f820ce99d 100644
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
index_mapper,
features: _,
max_number_of_tasks: _,
+ puffin_frame: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index df87d868d..0b3a5d58a 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -33,6 +33,7 @@ pub type Result = std::result::Result;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
+use std::fs::File;
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use puffin::FrameView;
use roaring::RoaringBitmap;
use synchronoise::SignalEvent;
use time::format_description::well_known::Rfc3339;
@@ -314,6 +316,9 @@ pub struct IndexScheduler {
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
+ /// A frame to output the indexation profiling files to disk.
+ pub(crate) puffin_frame: Arc,
+
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
@@ -364,6 +369,7 @@ impl IndexScheduler {
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_tasks: self.max_number_of_tasks,
+ puffin_frame: self.puffin_frame.clone(),
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
auth_path: self.auth_path.clone(),
@@ -457,6 +463,7 @@ impl IndexScheduler {
env,
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
+ puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
autobatching_enabled: options.autobatching_enabled,
max_number_of_tasks: options.max_number_of_tasks,
dumps_path: options.dumps_path,
@@ -572,17 +579,52 @@ impl IndexScheduler {
run.wake_up.wait();
loop {
+ let puffin_enabled = match run.features() {
+ Ok(features) => features.check_puffin().is_ok(),
+ Err(e) => {
+ log::error!("{e}");
+ continue;
+ }
+ };
+ puffin::set_scopes_on(puffin_enabled);
+ puffin::GlobalProfiler::lock().new_frame();
+
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
- log::error!("{}", e);
+ log::error!("{e}");
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1));
}
}
}
+
+ // Let's write the previous frame to disk but only if
+ // the user wanted to profile with puffin.
+ if puffin_enabled {
+ let mut frame_view = run.puffin_frame.lock();
+ if !frame_view.is_empty() {
+ let now = OffsetDateTime::now_utc();
+ let mut file = match File::create(format!("{}.puffin", now)) {
+ Ok(file) => file,
+ Err(e) => {
+ log::error!("{e}");
+ continue;
+ }
+ };
+ if let Err(e) = frame_view.save_to_writer(&mut file) {
+ log::error!("{e}");
+ }
+ if let Err(e) = file.sync_all() {
+ log::error!("{e}");
+ }
+ // We erase this frame view as it is no more useful. We want to
+ // measure the new frames now that we exported the previous ones.
+ *frame_view = FrameView::default();
+ }
+ }
}
})
.unwrap();
@@ -1062,8 +1104,6 @@ impl IndexScheduler {
self.breakpoint(Breakpoint::Start);
}
- puffin::GlobalProfiler::lock().new_frame();
-
self.cleanup_task_queue()?;
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs
index 4fe4affd4..33afe2d24 100644
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@@ -6,6 +6,7 @@ pub struct RuntimeTogglableFeatures {
pub score_details: bool,
pub vector_store: bool,
pub metrics: bool,
+ pub export_puffin_reports: bool,
}
#[derive(Default, Debug, Clone, Copy)]
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index a2536a52f..35a4a4304 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
-puffin = "0.16.0"
-puffin_http = { version = "0.13.0", optional = true }
+puffin = { version = "0.16.0", features = ["serialization"] }
rand = "0.8.5"
rayon = "1.7.0"
regex = "1.7.3"
@@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
-profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index a3905d451..246d62c3b 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
- #[cfg(feature = "profile-with-puffin")]
- let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
- puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
-
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 8e35acc56..4437f602d 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -46,6 +46,8 @@ pub struct RuntimeTogglableFeatures {
pub vector_store: Option,
#[deserr(default)]
pub metrics: Option,
+ #[deserr(default)]
+ pub export_puffin_reports: Option,
}
async fn patch_features(
@@ -60,11 +62,14 @@ async fn patch_features(
let features = index_scheduler.features()?;
let old_features = features.runtime_features();
-
let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
+ export_puffin_reports: new_features
+ .0
+ .export_puffin_reports
+ .unwrap_or(old_features.export_puffin_reports),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@@ -74,6 +79,7 @@ async fn patch_features(
score_details,
vector_store,
metrics,
+ export_puffin_reports,
} = new_features;
analytics.publish(
@@ -82,6 +88,7 @@ async fn patch_features(
"score_details": score_details,
"vector_store": vector_store,
"metrics": metrics,
+ "export_puffin_reports": export_puffin_reports,
}),
Some(&req),
);
diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs
index 9de829d50..8ac73c097 100644
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@@ -20,7 +20,8 @@ async fn experimental_features() {
{
"scoreDetails": false,
"vectorStore": false,
- "metrics": false
+ "metrics": false,
+ "exportPuffinReports": false
}
"###);
@@ -31,7 +32,8 @@ async fn experimental_features() {
{
"scoreDetails": false,
"vectorStore": true,
- "metrics": false
+ "metrics": false,
+ "exportPuffinReports": false
}
"###);
@@ -42,7 +44,8 @@ async fn experimental_features() {
{
"scoreDetails": false,
"vectorStore": true,
- "metrics": false
+ "metrics": false,
+ "exportPuffinReports": false
}
"###);
@@ -54,7 +57,8 @@ async fn experimental_features() {
{
"scoreDetails": false,
"vectorStore": true,
- "metrics": false
+ "metrics": false,
+ "exportPuffinReports": false
}
"###);
@@ -66,7 +70,8 @@ async fn experimental_features() {
{
"scoreDetails": false,
"vectorStore": true,
- "metrics": false
+ "metrics": false,
+ "exportPuffinReports": false
}
"###);
}
@@ -85,7 +90,8 @@ async fn experimental_feature_metrics() {
{
"scoreDetails": false,
"vectorStore": false,
- "metrics": true
+ "metrics": true,
+ "exportPuffinReports": false
}
"###);
@@ -105,7 +111,7 @@ async fn experimental_feature_metrics() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
- "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/meilisearch/discussions/3518",
+ "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@@ -132,7 +138,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
- "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`",
+ "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs
index e23714530..164ad0c7e 100644
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
self.delete_document(docid);
Some(docid)
}
- pub fn execute(self) -> Result {
- puffin::profile_function!();
+ pub fn execute(self) -> Result {
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
self.execute_inner()?;
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
}
+
pub(crate) fn execute_inner(mut self) -> Result {
+ puffin::profile_function!();
+
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
// We retrieve the current documents ids that are in the database.
@@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
+ for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
{
+ puffin::profile_function!();
+
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;
let previous_len = docids.len();
@@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids(
db: &Database,
to_remove: &RoaringBitmap,
) -> Result>> {
+ puffin::profile_function!();
+
let mut prefixes_to_delete = fst::SetBuilder::memory();
// We iterate over the word prefix docids database and remove the deleted documents ids
@@ -528,6 +534,8 @@ fn remove_from_word_docids(
words_to_keep: &mut BTreeSet,
words_to_remove: &mut BTreeSet,
) -> Result<()> {
+ puffin::profile_function!();
+
// We create an iterator to be able to get the content and delete the word docids.
// It's faster to acquire a cursor to get and delete or put, as we avoid traversing
// the LMDB B-Tree two times but only once.
@@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value(
field_id: FieldId,
to_remove: &RoaringBitmap,
) -> heed::Result>> {
+ puffin::profile_function!();
+
let db = match facet_type {
FacetType::String => {
index.field_id_docid_facet_strings.remap_types::()
@@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>(
where
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
{
+ puffin::profile_function!();
+
let mut iter = db.remap_key_type::().iter_mut(wtxn)?;
while let Some(result) = iter.next() {
let (bytes, mut docids) = result?;