diff --git a/.github/workflows/trigger-benchmarks-on-message.yml b/.github/workflows/benchmarks-pr.yml similarity index 100% rename from .github/workflows/trigger-benchmarks-on-message.yml rename to .github/workflows/benchmarks-pr.yml diff --git a/Cargo.lock b/Cargo.lock index 89f3561bc..a6b22871f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2564,7 +2564,6 @@ dependencies = [ "platform-dirs", "prometheus", "puffin", - "puffin_http", "rand", "rayon", "regex", @@ -3236,18 +3235,6 @@ dependencies = [ "serde", ] -[[package]] -name = "puffin_http" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193" -dependencies = [ - "anyhow", - "crossbeam-channel", - "log", - "puffin", -] - [[package]] name = "quote" version = "1.0.32" diff --git a/PROFILING.md b/PROFILING.md index 4ae21f2f5..daf46bbc2 100644 --- a/PROFILING.md +++ b/PROFILING.md @@ -1,14 +1,14 @@ # Profiling Meilisearch -Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options. +Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui). ![An example profiling with Puffin viewer](assets/profiling-example.png) ## Profiling the Indexing Process -When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method. +When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method. -Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server. +[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing. Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help. diff --git a/README.md b/README.md index 88621729d..cb9475dea 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,12 @@

⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍

+--- + +### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A! + +--- + Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index ccdcbcbb6..48eae0063 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -19,6 +19,7 @@ one indexing operation. use std::collections::{BTreeSet, HashSet}; use std::ffi::OsStr; +use std::fmt; use std::fs::{self, File}; use std::io::BufWriter; @@ -199,6 +200,29 @@ impl Batch { } } +impl fmt::Display for Batch { + /// A text used when we debug the profiling reports. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let index_uid = self.index_uid(); + let tasks = self.ids(); + match self { + Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?, + Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?, + Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?, + Batch::Dump(_) => f.write_str("Dump")?, + Batch::IndexOperation { op, .. } => write!(f, "{op}")?, + Batch::IndexCreation { .. } => f.write_str("IndexCreation")?, + Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?, + Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?, + Batch::IndexSwap { .. } => f.write_str("IndexSwap")?, + }; + match index_uid { + Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")), + None => f.write_fmt(format_args!(" from tasks: {tasks:?}")), + } + } +} + impl IndexOperation { pub fn index_uid(&self) -> &str { match self { @@ -213,6 +237,30 @@ impl IndexOperation { } } +impl fmt::Display for IndexOperation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IndexOperation::DocumentOperation { .. } => { + f.write_str("IndexOperation::DocumentOperation") + } + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") + } + IndexOperation::IndexDocumentDeletionByFilter { .. } => { + f.write_str("IndexOperation::IndexDocumentDeletionByFilter") + } + IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), + IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), + IndexOperation::DocumentClearAndSetting { .. } => { + f.write_str("IndexOperation::DocumentClearAndSetting") + } + IndexOperation::SettingsAndDocumentOperation { .. } => { + f.write_str("IndexOperation::SettingsAndDocumentOperation") + } + } + } +} + impl IndexScheduler { /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. /// @@ -581,7 +629,7 @@ impl IndexScheduler { self.breakpoint(crate::Breakpoint::InsideProcessBatch); } - puffin::profile_function!(format!("{:?}", batch)); + puffin::profile_function!(batch.to_string()); match batch { Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 442a43320..a9d242619 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -47,7 +47,7 @@ impl RoFeatures { Err(FeatureNotEnabledError { disabled_action: "Getting metrics", feature: "metrics", - issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518", + issue_link: "https://github.com/meilisearch/product/discussions/625", } .into()) } @@ -65,6 +65,19 @@ impl RoFeatures { .into()) } } + + pub fn check_puffin(&self) -> Result<()> { + if self.runtime.export_puffin_reports { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "Outputting Puffin reports to disk", + feature: "export puffin reports", + issue_link: "https://github.com/meilisearch/product/discussions/693", + } + .into()) + } + } } impl FeatureData { diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index afcfdb270..f820ce99d 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -30,6 +30,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { index_mapper, features: _, max_number_of_tasks: _, + puffin_frame: _, wake_up: _, dumps_path: _, snapshots_path: _, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index df87d868d..0b3a5d58a 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -33,6 +33,7 @@ pub type Result = std::result::Result; pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; +use std::fs::File; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -52,6 +53,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use puffin::FrameView; use roaring::RoaringBitmap; use synchronoise::SignalEvent; use time::format_description::well_known::Rfc3339; @@ -314,6 +316,9 @@ pub struct IndexScheduler { /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, + /// A frame to output the indexation profiling files to disk. + pub(crate) puffin_frame: Arc, + /// The path used to create the dumps. pub(crate) dumps_path: PathBuf, @@ -364,6 +369,7 @@ impl IndexScheduler { wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, max_number_of_tasks: self.max_number_of_tasks, + puffin_frame: self.puffin_frame.clone(), snapshots_path: self.snapshots_path.clone(), dumps_path: self.dumps_path.clone(), auth_path: self.auth_path.clone(), @@ -457,6 +463,7 @@ impl IndexScheduler { env, // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things wake_up: Arc::new(SignalEvent::auto(true)), + puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, max_number_of_tasks: options.max_number_of_tasks, dumps_path: options.dumps_path, @@ -572,17 +579,52 @@ impl IndexScheduler { run.wake_up.wait(); loop { + let puffin_enabled = match run.features() { + Ok(features) => features.check_puffin().is_ok(), + Err(e) => { + log::error!("{e}"); + continue; + } + }; + puffin::set_scopes_on(puffin_enabled); + puffin::GlobalProfiler::lock().new_frame(); + match run.tick() { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Err(e) => { - log::error!("{}", e); + log::error!("{e}"); // Wait one second when an irrecoverable error occurs. if !e.is_recoverable() { std::thread::sleep(Duration::from_secs(1)); } } } + + // Let's write the previous frame to disk but only if + // the user wanted to profile with puffin. + if puffin_enabled { + let mut frame_view = run.puffin_frame.lock(); + if !frame_view.is_empty() { + let now = OffsetDateTime::now_utc(); + let mut file = match File::create(format!("{}.puffin", now)) { + Ok(file) => file, + Err(e) => { + log::error!("{e}"); + continue; + } + }; + if let Err(e) = frame_view.save_to_writer(&mut file) { + log::error!("{e}"); + } + if let Err(e) = file.sync_all() { + log::error!("{e}"); + } + // We erase this frame view as it is no more useful. We want to + // measure the new frames now that we exported the previous ones. + *frame_view = FrameView::default(); + } + } } }) .unwrap(); @@ -1062,8 +1104,6 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } - puffin::GlobalProfiler::lock().new_frame(); - self.cleanup_task_queue()?; let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index 4fe4affd4..33afe2d24 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -6,6 +6,7 @@ pub struct RuntimeTogglableFeatures { pub score_details: bool, pub vector_store: bool, pub metrics: bool, + pub export_puffin_reports: bool, } #[derive(Default, Debug, Clone, Copy)] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index a2536a52f..35a4a4304 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" } pin-project-lite = "0.2.9" platform-dirs = "0.3.0" prometheus = { version = "0.13.3", features = ["process"] } -puffin = "0.16.0" -puffin_http = { version = "0.13.0", optional = true } +puffin = { version = "0.16.0", features = ["serialization"] } rand = "0.8.5" rayon = "1.7.0" regex = "1.7.3" @@ -135,7 +134,6 @@ zip = { version = "0.6.4", optional = true } [features] default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] -profile-with-puffin = ["dep:puffin_http"] mini-dashboard = [ "actix-web-static-files", "static-files", diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index a3905d451..246d62c3b 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -30,10 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; - #[cfg(feature = "profile-with-puffin")] - let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?; - puffin::set_scopes_on(cfg!(feature = "profile-with-puffin")); - anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 8e35acc56..4437f602d 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -46,6 +46,8 @@ pub struct RuntimeTogglableFeatures { pub vector_store: Option, #[deserr(default)] pub metrics: Option, + #[deserr(default)] + pub export_puffin_reports: Option, } async fn patch_features( @@ -60,11 +62,14 @@ async fn patch_features( let features = index_scheduler.features()?; let old_features = features.runtime_features(); - let new_features = meilisearch_types::features::RuntimeTogglableFeatures { score_details: new_features.0.score_details.unwrap_or(old_features.score_details), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), + export_puffin_reports: new_features + .0 + .export_puffin_reports + .unwrap_or(old_features.export_puffin_reports), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because @@ -74,6 +79,7 @@ async fn patch_features( score_details, vector_store, metrics, + export_puffin_reports, } = new_features; analytics.publish( @@ -82,6 +88,7 @@ async fn patch_features( "score_details": score_details, "vector_store": vector_store, "metrics": metrics, + "export_puffin_reports": export_puffin_reports, }), Some(&req), ); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index 9de829d50..8ac73c097 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -20,7 +20,8 @@ async fn experimental_features() { { "scoreDetails": false, "vectorStore": false, - "metrics": false + "metrics": false, + "exportPuffinReports": false } "###); @@ -31,7 +32,8 @@ async fn experimental_features() { { "scoreDetails": false, "vectorStore": true, - "metrics": false + "metrics": false, + "exportPuffinReports": false } "###); @@ -42,7 +44,8 @@ async fn experimental_features() { { "scoreDetails": false, "vectorStore": true, - "metrics": false + "metrics": false, + "exportPuffinReports": false } "###); @@ -54,7 +57,8 @@ async fn experimental_features() { { "scoreDetails": false, "vectorStore": true, - "metrics": false + "metrics": false, + "exportPuffinReports": false } "###); @@ -66,7 +70,8 @@ async fn experimental_features() { { "scoreDetails": false, "vectorStore": true, - "metrics": false + "metrics": false, + "exportPuffinReports": false } "###); } @@ -85,7 +90,8 @@ async fn experimental_feature_metrics() { { "scoreDetails": false, "vectorStore": false, - "metrics": true + "metrics": true, + "exportPuffinReports": false } "###); @@ -105,7 +111,7 @@ async fn experimental_feature_metrics() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/meilisearch/discussions/3518", + "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -132,7 +138,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`", + "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index e23714530..164ad0c7e 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } - pub fn execute(self) -> Result { - puffin::profile_function!(); + pub fn execute(self) -> Result { let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } = self.execute_inner()?; Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) } + pub(crate) fn execute_inner(mut self) -> Result { + puffin::profile_function!(); + self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { C: for<'a> BytesDecode<'a, DItem = RoaringBitmap> + for<'a> BytesEncode<'a, EItem = RoaringBitmap>, { + puffin::profile_function!(); + while let Some(result) = iter.next() { let (bytes, mut docids) = result?; let previous_len = docids.len(); @@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids( db: &Database, to_remove: &RoaringBitmap, ) -> Result>> { + puffin::profile_function!(); + let mut prefixes_to_delete = fst::SetBuilder::memory(); // We iterate over the word prefix docids database and remove the deleted documents ids @@ -528,6 +534,8 @@ fn remove_from_word_docids( words_to_keep: &mut BTreeSet, words_to_remove: &mut BTreeSet, ) -> Result<()> { + puffin::profile_function!(); + // We create an iterator to be able to get the content and delete the word docids. // It's faster to acquire a cursor to get and delete or put, as we avoid traversing // the LMDB B-Tree two times but only once. @@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value( field_id: FieldId, to_remove: &RoaringBitmap, ) -> heed::Result>> { + puffin::profile_function!(); + let db = match facet_type { FacetType::String => { index.field_id_docid_facet_strings.remap_types::() @@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>( where C: heed::BytesDecode<'a> + heed::BytesEncode<'a>, { + puffin::profile_function!(); + let mut iter = db.remap_key_type::().iter_mut(wtxn)?; while let Some(result) = iter.next() { let (bytes, mut docids) = result?;