From ddc2b7129a432d962a407b8a5f6b3f7ae49b1cb1 Mon Sep 17 00:00:00 2001 From: Elliot Lintz <45725915+Elliot67@users.noreply.github.com> Date: Wed, 24 Jan 2024 22:46:21 +0100 Subject: [PATCH 01/87] fix readme broken links --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 35b4cb97a..8cd396fd2 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f ## ✨ Features - **Search-as-you-type:** find search results in less than 50 milliseconds -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling @@ -61,8 +61,6 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. -You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features. - ## ⚡ Supercharge your Meilisearch experience Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required. @@ -101,7 +99,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle - For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions) - Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)! -- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch) +- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=feedback) Thank you for your support! From 9eeb75d501b2ce9fd58ab2ad131059aee3c8dcc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 6 Feb 2024 10:47:04 +0100 Subject: [PATCH 02/87] Clamp the max memory of the grenad sorters to a reasonable maximum --- .../index_documents/helpers/grenad_helpers.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index e1b27baa2..dde03c73d 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -9,6 +9,10 @@ use super::{ClonableMmap, MergeFn}; use crate::update::index_documents::valid_lmdb_key; use crate::Result; +/// This is something reasonable given the fact +/// that there is one grenad sorter by thread. +const MAX_GRENAD_SORTER_USAGE: usize = 200 * 1024 * 1024; // 200 MiB + pub type CursorClonableMmap = io::Cursor; pub fn create_writer( @@ -24,6 +28,9 @@ pub fn create_writer( builder.build(BufWriter::new(file)) } +/// A helper function that creates a grenad sorter +/// with the given parameters. The max memory is +/// clamped to something reasonable. pub fn create_sorter( sort_algorithm: grenad::SortAlgorithm, merge: MergeFn, @@ -41,7 +48,7 @@ pub fn create_sorter( builder.max_nb_chunks(nb_chunks); } if let Some(memory) = max_memory { - builder.dump_threshold(memory); + builder.dump_threshold(memory.min(MAX_GRENAD_SORTER_USAGE)); builder.allow_realloc(false); } builder.sort_algorithm(sort_algorithm); @@ -187,10 +194,15 @@ impl Default for GrenadParameters { impl GrenadParameters { /// This function use the number of threads in the current threadpool to compute the value. + /// /// This should be called inside of a rayon thread pool, - /// Otherwise, it will take the global number of threads. + /// otherwise, it will take the global number of threads. + /// + /// The max memory cannot exceed a given reasonable value. pub fn max_memory_by_thread(&self) -> Option { - self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads()) + self.max_memory.map(|max_memory| { + (max_memory / rayon::current_num_threads()).min(MAX_GRENAD_SORTER_USAGE) + }) } } From 05edd85d7572fe544e11948dd85510d9d9520358 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 6 Feb 2024 10:55:27 +0100 Subject: [PATCH 03/87] Stabilize scoreDetails --- index-scheduler/src/features.rs | 13 ------------- meilisearch-types/src/features.rs | 1 - meilisearch/src/routes/features.rs | 5 ----- meilisearch/src/search.rs | 4 ---- meilisearch/tests/dumps/mod.rs | 1 - meilisearch/tests/features/mod.rs | 8 +------- meilisearch/tests/search/hybrid.rs | 1 - meilisearch/tests/search/mod.rs | 30 +++--------------------------- 8 files changed, 4 insertions(+), 59 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index dad5e86f4..c18ab98db 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -30,19 +30,6 @@ impl RoFeatures { self.runtime } - pub fn check_score_details(&self) -> Result<()> { - if self.runtime.score_details { - Ok(()) - } else { - Err(FeatureNotEnabledError { - disabled_action: "Computing score details", - feature: "score details", - issue_link: "https://github.com/meilisearch/product/discussions/674", - } - .into()) - } - } - pub fn check_metrics(&self) -> Result<()> { if self.runtime.metrics { Ok(()) diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index 33afe2d24..d737c618e 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -3,7 +3,6 @@ use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[serde(rename_all = "camelCase", default)] pub struct RuntimeTogglableFeatures { - pub score_details: bool, pub vector_store: bool, pub metrics: bool, pub export_puffin_reports: bool, diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index e7fd8de22..0a7e73ac6 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -40,8 +40,6 @@ async fn get_features( #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct RuntimeTogglableFeatures { - #[deserr(default)] - pub score_details: Option, #[deserr(default)] pub vector_store: Option, #[deserr(default)] @@ -63,7 +61,6 @@ async fn patch_features( let old_features = features.runtime_features(); let new_features = meilisearch_types::features::RuntimeTogglableFeatures { - score_details: new_features.0.score_details.unwrap_or(old_features.score_details), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), export_puffin_reports: new_features @@ -76,7 +73,6 @@ async fn patch_features( // the it renames to camelCase, which we don't want for analytics. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. let meilisearch_types::features::RuntimeTogglableFeatures { - score_details, vector_store, metrics, export_puffin_reports, @@ -85,7 +81,6 @@ async fn patch_features( analytics.publish( "Experimental features Updated".to_string(), json!({ - "score_details": score_details, "vector_store": vector_store, "metrics": metrics, "export_puffin_reports": export_puffin_reports, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 551f89216..27de36c6d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -441,10 +441,6 @@ fn prepare_search<'t>( ScoringStrategy::Skip }); - if query.show_ranking_score_details { - features.check_score_details()?; - } - if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid { search.embedder_name(embedder); } diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index fd34268a5..632180aac 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1845,7 +1845,6 @@ async fn import_dump_v6_containing_experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": false, "exportPuffinReports": false diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index abb006ac8..a8147f111 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -18,7 +18,6 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": false, "exportPuffinReports": false @@ -30,7 +29,6 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, "exportPuffinReports": false @@ -42,7 +40,6 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, "exportPuffinReports": false @@ -55,7 +52,6 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, "exportPuffinReports": false @@ -68,7 +64,6 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, "exportPuffinReports": false @@ -88,7 +83,6 @@ async fn experimental_feature_metrics() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": true, "exportPuffinReports": false @@ -146,7 +140,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 6ea9920f6..d3e556ab3 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -13,7 +13,6 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, "exportPuffinReports": false diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 9b7b01029..90098c5b6 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -766,38 +766,14 @@ async fn faceting_max_values_per_facet() { } #[actix_rt::test] -async fn experimental_feature_score_details() { +async fn test_score_details() { let server = Server::new().await; let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(json!(documents), None).await; - index.wait_task(0).await; - - index - .search( - json!({ - "q": "train dragon", - "showRankingScoreDetails": true, - }), - |response, code| { - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }, - ) - .await; - - let (response, code) = server.set_features(json!({"scoreDetails": true})).await; - meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(response["scoreDetails"], @"true"); + let res = index.add_documents(json!(documents), None).await; + index.wait_task(res.0.uid()).await; index .search( From 29f8300ac7c14b4b7781aa7c11a3f07a384a9984 Mon Sep 17 00:00:00 2001 From: gui machiavelli Date: Tue, 6 Feb 2024 16:49:29 +0100 Subject: [PATCH 04/87] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8cd396fd2..a1c5c2f9d 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle - For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions) - Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)! -- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=feedback) +- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) Thank you for your support! From 053306c0e7b8b26b72489d5a442c56846c14b83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 7 Feb 2024 11:24:43 +0100 Subject: [PATCH 05/87] Try with 500MiB --- milli/src/update/index_documents/helpers/grenad_helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index dde03c73d..9af9bd019 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -11,7 +11,7 @@ use crate::Result; /// This is something reasonable given the fact /// that there is one grenad sorter by thread. -const MAX_GRENAD_SORTER_USAGE: usize = 200 * 1024 * 1024; // 200 MiB +const MAX_GRENAD_SORTER_USAGE: usize = 500 * 1024 * 1024; // 500 MiB pub type CursorClonableMmap = io::Cursor; From fb705116a6798035c199f48730a027c426933086 Mon Sep 17 00:00:00 2001 From: Gosti Date: Tue, 30 Jan 2024 16:32:57 +0100 Subject: [PATCH 06/87] feat: add new models and ability to override dimensions --- milli/src/vector/openai.rs | 54 ++++++++++++++++++++++++++++++++---- milli/src/vector/settings.rs | 3 ++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 524f83b80..20013d8e8 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -17,6 +17,7 @@ pub struct Embedder { pub struct EmbedderOptions { pub api_key: Option, pub embedding_model: EmbeddingModel, + pub dimensions: Option, } #[derive( @@ -41,34 +42,54 @@ pub enum EmbeddingModel { #[serde(rename = "text-embedding-ada-002")] #[deserr(rename = "text-embedding-ada-002")] TextEmbeddingAda002, + + #[serde(rename = "text-embedding-3-small")] + #[deserr(rename = "text-embedding-3-small")] + TextEmbedding3Small, + + #[serde(rename = "text-embedding-3-large")] + #[deserr(rename = "text-embedding-3-large")] + TextEmbedding3Large, } impl EmbeddingModel { pub fn supported_models() -> &'static [&'static str] { - &["text-embedding-ada-002"] + &["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"] } pub fn max_token(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 8191, + EmbeddingModel::TextEmbedding3Large => 8191, + EmbeddingModel::TextEmbedding3Small => 8191, } } pub fn dimensions(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 1536, + + //Default value for the model + EmbeddingModel::TextEmbedding3Large => 1536, + + //Default value for the model + EmbeddingModel::TextEmbedding3Small => 3072, } } pub fn name(&self) -> &'static str { match self { EmbeddingModel::TextEmbeddingAda002 => "text-embedding-ada-002", + EmbeddingModel::TextEmbedding3Large => "text-embedding-3-large", + EmbeddingModel::TextEmbedding3Small => "text-embedding-3-small", } } pub fn from_name(name: &str) -> Option { match name { "text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002), + "text-embedding-3-large" => Some(EmbeddingModel::TextEmbedding3Large), + "text-embedding-3-small" => Some(EmbeddingModel::TextEmbedding3Small), _ => None, } } @@ -78,6 +99,20 @@ impl EmbeddingModel { EmbeddingModel::TextEmbeddingAda002 => { Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) } + EmbeddingModel::TextEmbedding3Large => { + Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) + } + EmbeddingModel::TextEmbedding3Small => { + Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) + } + } + } + + pub fn is_optional_dimensions_supported(&self) -> bool { + match self { + EmbeddingModel::TextEmbeddingAda002 => false, + EmbeddingModel::TextEmbedding3Large => true, + EmbeddingModel::TextEmbedding3Small => true, } } } @@ -86,11 +121,11 @@ pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings"; impl EmbedderOptions { pub fn with_default_model(api_key: Option) -> Self { - Self { api_key, embedding_model: Default::default() } + Self { api_key, embedding_model: Default::default(), dimensions: None } } pub fn with_embedding_model(api_key: Option, embedding_model: EmbeddingModel) -> Self { - Self { api_key, embedding_model } + Self { api_key, embedding_model, dimensions: None } } } @@ -237,7 +272,15 @@ impl Embedder { for text in texts { log::trace!("Received prompt: {}", text.as_ref()) } - let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts }; + let request = OpenAiRequest { + model: self.options.embedding_model.name(), + input: texts, + dimension: if self.options.embedding_model.is_optional_dimensions_supported() { + self.options.dimensions.as_ref() + } else { + None + }, + }; let response = client .post(OPENAI_EMBEDDINGS_URL) .json(&request) @@ -366,7 +409,7 @@ impl Embedder { } pub fn dimensions(&self) -> usize { - self.options.embedding_model.dimensions() + self.options.dimensions.unwrap_or_else(|| self.options.embedding_model.dimensions()) } pub fn distribution(&self) -> Option { @@ -431,6 +474,7 @@ impl Retry { struct OpenAiRequest<'a, S: AsRef + serde::Serialize> { model: &'a str, input: &'a [S], + dimension: Option<&'a usize>, } #[derive(Debug, Serialize)] diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 37fb80452..dac129ccd 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -208,6 +208,9 @@ impl From for EmbeddingConfig { if let Some(api_key) = api_key.set() { options.api_key = Some(api_key); } + if let Some(dimensions) = dimensions.set() { + options.dimensions = Some(dimensions); + } this.embedder_options = super::EmbedderOptions::OpenAi(options); } EmbedderSource::HuggingFace => { From 7ae401347868f9ff7046fe68c44e4eb73c887a4f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 10:36:30 +0100 Subject: [PATCH 07/87] Make sure the overriden dimensions are always used when embedding --- milli/src/vector/openai.rs | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 20013d8e8..8712c7894 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -65,14 +65,10 @@ impl EmbeddingModel { } } - pub fn dimensions(&self) -> usize { + pub fn default_dimensions(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 1536, - - //Default value for the model EmbeddingModel::TextEmbedding3Large => 1536, - - //Default value for the model EmbeddingModel::TextEmbedding3Small => 3072, } } @@ -108,7 +104,7 @@ impl EmbeddingModel { } } - pub fn is_optional_dimensions_supported(&self) -> bool { + pub fn supports_overriding_dimensions(&self) -> bool { match self { EmbeddingModel::TextEmbeddingAda002 => false, EmbeddingModel::TextEmbedding3Large => true, @@ -275,7 +271,7 @@ impl Embedder { let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts, - dimension: if self.options.embedding_model.is_optional_dimensions_supported() { + dimension: if self.options.embedding_model.supports_overriding_dimensions() { self.options.dimensions.as_ref() } else { None @@ -323,8 +319,7 @@ impl Embedder { } let mut tokens = encoded.as_slice(); - let mut embeddings_for_prompt = - Embeddings::new(self.options.embedding_model.dimensions()); + let mut embeddings_for_prompt = Embeddings::new(self.dimensions()); while tokens.len() > max_token_count { let window = &tokens[..max_token_count]; embeddings_for_prompt.push(self.embed_tokens(window, client).await?).unwrap(); @@ -409,7 +404,11 @@ impl Embedder { } pub fn dimensions(&self) -> usize { - self.options.dimensions.unwrap_or_else(|| self.options.embedding_model.dimensions()) + if self.options.embedding_model.supports_overriding_dimensions() { + self.options.dimensions.unwrap_or(self.options.embedding_model.default_dimensions()) + } else { + self.options.embedding_model.default_dimensions() + } } pub fn distribution(&self) -> Option { From 9ac57500964450f8003e5ac9f70ae49e186a314d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 10:37:59 +0100 Subject: [PATCH 08/87] Retrieve the overriden dimensions from the configuration when fetching settings --- milli/src/vector/settings.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index dac129ccd..6fe8eddc0 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -176,7 +176,7 @@ impl From for EmbeddingSettings { model: Setting::Set(options.embedding_model.name().to_owned()), revision: Setting::NotSet, api_key: options.api_key.map(Setting::Set).unwrap_or_default(), - dimensions: Setting::NotSet, + dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(), document_template: Setting::Set(prompt.template), }, super::EmbedderOptions::UserProvided(options) => Self { From 517f5332d67b30f06056557daa4a8b32d43f9449 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 10:39:19 +0100 Subject: [PATCH 09/87] Allow actually passing `dimensions` for OpenAI source -> make sure the settings change is rejected or the settings task fails when the specified model doesn't support overriding `dimensions` and the passed `dimensions` differs from the model's default dimensions. --- meilisearch-types/src/error.rs | 1 + milli/src/error.rs | 7 +++++++ milli/src/update/settings.rs | 24 +++++++++++++++++++----- milli/src/vector/settings.rs | 24 ++++++++++++++++++++---- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 2182b1836..796eb5713 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -347,6 +347,7 @@ impl ErrorCode for milli::Error { UserError::InvalidFieldForSource { .. } | UserError::MissingFieldForSource { .. } | UserError::InvalidOpenAiModel { .. } + | UserError::InvalidOpenAiModelDimensions { .. } | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, diff --git a/milli/src/error.rs b/milli/src/error.rs index 5a4fbc7f5..9cb984db1 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -227,6 +227,13 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco source_: crate::vector::settings::EmbedderSource, embedder_name: String, }, + #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")] + InvalidOpenAiModelDimensions { + embedder_name: String, + model: &'static str, + dimensions: usize, + expected_dimensions: usize, + }, } impl From for Error { diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index d770bcd74..b8289626b 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -974,6 +974,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { crate::vector::settings::EmbeddingSettings::apply_default_source( &mut setting, ); + crate::vector::settings::EmbeddingSettings::apply_default_openai_model( + &mut setting, + ); let setting = validate_embedding_settings(setting, &name)?; changed = true; new_configs.insert(name, setting); @@ -1132,14 +1135,25 @@ pub fn validate_embedding_settings( match inferred_source { EmbedderSource::OpenAi => { check_unset(&revision, "revision", inferred_source, name)?; - check_unset(&dimensions, "dimensions", inferred_source, name)?; if let Setting::Set(model) = &model { - crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or( - crate::error::UserError::InvalidOpenAiModel { + let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) + .ok_or(crate::error::UserError::InvalidOpenAiModel { embedder_name: name.to_owned(), model: model.clone(), - }, - )?; + })?; + if let Setting::Set(dimensions) = dimensions { + if !model.supports_overriding_dimensions() + && dimensions != model.default_dimensions() + { + return Err(crate::error::UserError::InvalidOpenAiModelDimensions { + embedder_name: name.to_owned(), + model: model.name(), + dimensions, + expected_dimensions: model.default_dimensions(), + } + .into()); + } + } } } EmbedderSource::HuggingFace => { diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 6fe8eddc0..834a1c81d 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,6 +1,7 @@ use deserr::Deserr; use serde::{Deserialize, Serialize}; +use super::openai; use crate::prompt::PromptData; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -82,7 +83,7 @@ impl EmbeddingSettings { Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::REVISION => &[EmbedderSource::HuggingFace], Self::API_KEY => &[EmbedderSource::OpenAi], - Self::DIMENSIONS => &[EmbedderSource::UserProvided], + Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], _other => unreachable!("unknown field"), } @@ -90,9 +91,13 @@ impl EmbeddingSettings { pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { match source { - EmbedderSource::OpenAi => { - &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE] - } + EmbedderSource::OpenAi => &[ + Self::SOURCE, + Self::MODEL, + Self::API_KEY, + Self::DOCUMENT_TEMPLATE, + Self::DIMENSIONS, + ], EmbedderSource::HuggingFace => { &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] } @@ -109,6 +114,17 @@ impl EmbeddingSettings { *source = Setting::Set(EmbedderSource::default()) } } + + pub(crate) fn apply_default_openai_model(setting: &mut Setting) { + if let Setting::Set(EmbeddingSettings { + source: Setting::Set(EmbedderSource::OpenAi), + model: model @ (Setting::NotSet | Setting::Reset), + .. + }) = setting + { + *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) + } + } } #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] From 74c180267ef1a32f9ca8943b762edfb78afab669 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 11:03:00 +0100 Subject: [PATCH 10/87] pass dimensions only when defined --- milli/src/vector/openai.rs | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 8712c7894..9deb2e2da 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -271,11 +271,7 @@ impl Embedder { let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts, - dimension: if self.options.embedding_model.supports_overriding_dimensions() { - self.options.dimensions.as_ref() - } else { - None - }, + dimensions: self.overriden_dimensions(), }; let response = client .post(OPENAI_EMBEDDINGS_URL) @@ -360,8 +356,11 @@ impl Embedder { tokens: &[usize], client: &reqwest::Client, ) -> Result { - let request = - OpenAiTokensRequest { model: self.options.embedding_model.name(), input: tokens }; + let request = OpenAiTokensRequest { + model: self.options.embedding_model.name(), + input: tokens, + dimensions: self.overriden_dimensions(), + }; let response = client .post(OPENAI_EMBEDDINGS_URL) .json(&request) @@ -414,6 +413,14 @@ impl Embedder { pub fn distribution(&self) -> Option { self.options.embedding_model.distribution() } + + fn overriden_dimensions(&self) -> Option { + if self.options.embedding_model.supports_overriding_dimensions() { + self.options.dimensions + } else { + None + } + } } // retrying in case of failure @@ -473,13 +480,16 @@ impl Retry { struct OpenAiRequest<'a, S: AsRef + serde::Serialize> { model: &'a str, input: &'a [S], - dimension: Option<&'a usize>, + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(Debug, Serialize)] struct OpenAiTokensRequest<'a> { model: &'a str, input: &'a [usize], + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(Debug, Deserialize)] From 32ee05cceffc4cb3425283be67adc8a94d50824c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 11:48:19 +0100 Subject: [PATCH 11/87] Fix default dimensions for models --- milli/src/vector/openai.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 9deb2e2da..f608f1d12 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -68,8 +68,8 @@ impl EmbeddingModel { pub fn default_dimensions(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 1536, - EmbeddingModel::TextEmbedding3Large => 1536, - EmbeddingModel::TextEmbedding3Small => 3072, + EmbeddingModel::TextEmbedding3Large => 3072, + EmbeddingModel::TextEmbedding3Small => 1536, } } From 88d03c56ab479a1ad0710a368f6735e171017f92 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 11:48:47 +0100 Subject: [PATCH 12/87] Don't accept dimensions of 0 (ever) or dimensions greater than the default dimensions of the model --- meilisearch-types/src/error.rs | 2 ++ milli/src/error.rs | 9 +++++++++ milli/src/update/settings.rs | 17 +++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 796eb5713..1b54e77c0 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -348,6 +348,8 @@ impl ErrorCode for milli::Error { | UserError::MissingFieldForSource { .. } | UserError::InvalidOpenAiModel { .. } | UserError::InvalidOpenAiModelDimensions { .. } + | UserError::InvalidOpenAiModelDimensionsMax { .. } + | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, diff --git a/milli/src/error.rs b/milli/src/error.rs index 9cb984db1..1147085dd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -234,6 +234,15 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco dimensions: usize, expected_dimensions: usize, }, + #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")] + InvalidOpenAiModelDimensionsMax { + embedder_name: String, + model: &'static str, + dimensions: usize, + max_dimensions: usize, + }, + #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] + InvalidSettingsDimensions { embedder_name: String }, } impl From for Error { diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index b8289626b..a3ba42119 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1122,6 +1122,14 @@ pub fn validate_embedding_settings( let Setting::Set(settings) = settings else { return Ok(settings) }; let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } = settings; + + if let Some(0) = dimensions.set() { + return Err(crate::error::UserError::InvalidSettingsDimensions { + embedder_name: name.to_owned(), + } + .into()); + } + let Some(inferred_source) = source.set() else { return Ok(Setting::Set(EmbeddingSettings { source, @@ -1153,6 +1161,15 @@ pub fn validate_embedding_settings( } .into()); } + if dimensions > model.default_dimensions() { + return Err(crate::error::UserError::InvalidOpenAiModelDimensionsMax { + embedder_name: name.to_owned(), + model: model.name(), + dimensions, + max_dimensions: model.default_dimensions(), + } + .into()); + } } } } From a1caac9bfbb469256151cf163086b81fecc5817f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 14:22:13 +0100 Subject: [PATCH 13/87] Correct distribution shifts for new models --- milli/src/vector/openai.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index f608f1d12..104decb66 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -96,10 +96,10 @@ impl EmbeddingModel { Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) } EmbeddingModel::TextEmbedding3Large => { - Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) + Some(DistributionShift { current_mean: 0.70, current_sigma: 0.1 }) } EmbeddingModel::TextEmbedding3Small => { - Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) + Some(DistributionShift { current_mean: 0.75, current_sigma: 0.1 }) } } } From 89401d097be56716ecd213a34e83eea61c6a476f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 09:40:07 +0100 Subject: [PATCH 14/87] Add tracing-trace --- Cargo.lock | 139 +++++++++- Cargo.toml | 6 +- tracing-trace/.gitignore | 1 + tracing-trace/Cargo.toml | 15 ++ tracing-trace/src/bin/trace-to-firefox.rs | 18 ++ tracing-trace/src/entry.rs | 96 +++++++ tracing-trace/src/error.rs | 19 ++ tracing-trace/src/layer.rs | 152 +++++++++++ tracing-trace/src/lib.rs | 40 +++ tracing-trace/src/main.rs | 133 +++++++++ .../src/processor/firefox_profiler.rs | 255 ++++++++++++++++++ tracing-trace/src/processor/fmt.rs | 128 +++++++++ tracing-trace/src/processor/mod.rs | 2 + 13 files changed, 998 insertions(+), 6 deletions(-) create mode 100644 tracing-trace/.gitignore create mode 100644 tracing-trace/Cargo.toml create mode 100644 tracing-trace/src/bin/trace-to-firefox.rs create mode 100644 tracing-trace/src/entry.rs create mode 100644 tracing-trace/src/error.rs create mode 100644 tracing-trace/src/layer.rs create mode 100644 tracing-trace/src/lib.rs create mode 100644 tracing-trace/src/main.rs create mode 100644 tracing-trace/src/processor/firefox_profiler.rs create mode 100644 tracing-trace/src/processor/fmt.rs create mode 100644 tracing-trace/src/processor/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 3f9171edc..9e2cbbb31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -961,6 +961,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "color-spantrace" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -1286,6 +1298,15 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "deduplicating_array" version = "0.1.5" @@ -1911,6 +1932,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "fxprof-processed-profile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" +dependencies = [ + "bitflags 2.4.1", + "debugid", + "fxhash", + "serde", + "serde_json", +] + [[package]] name = "gemm" version = "0.17.0" @@ -3916,6 +3950,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-bigint" version = "0.4.3" @@ -4037,6 +4081,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "page_size" version = "0.5.0" @@ -4994,6 +5050,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -5326,6 +5391,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiktoken-rs" version = "0.5.8" @@ -5554,11 +5629,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -5578,11 +5652,60 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracing-trace" +version = "0.1.0" +dependencies = [ + "color-spantrace", + "fxprof-processed-profile", + "serde", + "serde_json", + "tracing", + "tracing-error", + "tracing-subscriber", ] [[package]] @@ -5758,6 +5881,12 @@ dependencies = [ "serde", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index a0c6c3ac9..7f6a8088e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,16 @@ members = [ "json-depth-checker", "benchmarks", "fuzzers", + "tracing-trace", "xtask", ] [workspace.package] version = "1.6.1" -authors = ["Quentin de Quelen ", "Clément Renault "] +authors = [ + "Quentin de Quelen ", + "Clément Renault ", +] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" readme = "README.md" diff --git a/tracing-trace/.gitignore b/tracing-trace/.gitignore new file mode 100644 index 000000000..ea8c4bf7f --- /dev/null +++ b/tracing-trace/.gitignore @@ -0,0 +1 @@ +/target diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml new file mode 100644 index 000000000..9215fdfd0 --- /dev/null +++ b/tracing-trace/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "tracing-trace" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +color-spantrace = "0.2.1" +fxprof-processed-profile = "0.6.0" +serde = { version = "1.0.195", features = ["derive"] } +serde_json = "1.0.111" +tracing = "0.1.40" +tracing-error = "0.2.0" +tracing-subscriber = "0.3.18" diff --git a/tracing-trace/src/bin/trace-to-firefox.rs b/tracing-trace/src/bin/trace-to-firefox.rs new file mode 100644 index 000000000..21adff41d --- /dev/null +++ b/tracing-trace/src/bin/trace-to-firefox.rs @@ -0,0 +1,18 @@ +use std::ffi::OsString; +use std::io::Write; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = + tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch") + .unwrap(); + let mut output_file = OsString::new(); + output_file.push("firefox-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + serde_json::to_writer(&mut output_file, &profile).unwrap(); + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs new file mode 100644 index 000000000..dd91a2a61 --- /dev/null +++ b/tracing-trace/src/entry.rs @@ -0,0 +1,96 @@ +use std::borrow::Cow; + +use serde::{Deserialize, Serialize}; +use tracing::span::Id as TracingId; + +#[derive(Debug, Serialize, Deserialize)] +pub enum Entry { + /// A code location was accessed for the first time + NewCallsite(NewCallsite), + + /// A new thread was accessed + NewThread(NewThread), + + /// A new call started + NewSpan(NewSpan), + + /// An already in-flight call started doing work. + /// + /// For synchronous functions, open should always be followed immediately by enter, exit and close, + /// but for asynchronous functions, work can suspend (exiting the span without closing it), and then + /// later resume (entering the span again without opening it). + /// + /// The timer for a span only starts when the span is entered. + SpanEnter(SpanEnter), + + /// An in-flight call suspended and paused work. + /// + /// For synchronous functions, exit should always be followed immediately by close, + /// but for asynchronous functions, work can suspend and then later resume. + /// + /// The timer for a span pauses when the span is exited. + SpanExit(SpanExit), + + /// A call ended + SpanClose(SpanClose), +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct SpanId(u64); + +impl From<&TracingId> for SpanId { + fn from(value: &TracingId) -> Self { + Self(value.into_u64()) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewCallsite { + pub call_id: ResourceId, + pub name: Cow<'static, str>, + pub module_path: Option>, + pub file: Option>, + pub line: Option, + pub target: Cow<'static, str>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewThread { + pub thread_id: ResourceId, + pub name: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanEnter { + pub id: SpanId, + pub time: std::time::Duration, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanExit { + pub id: SpanId, + pub time: std::time::Duration, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct NewSpan { + pub id: SpanId, + pub call_id: ResourceId, + pub parent_id: Option, + pub thread_id: ResourceId, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanClose { + pub id: SpanId, + pub time: std::time::Duration, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct ResourceId(pub(crate) usize); + +impl ResourceId { + pub fn to_usize(self) -> usize { + self.0 + } +} diff --git a/tracing-trace/src/error.rs b/tracing-trace/src/error.rs new file mode 100644 index 000000000..cce13f85c --- /dev/null +++ b/tracing-trace/src/error.rs @@ -0,0 +1,19 @@ +#[derive(Debug)] +pub enum Error { + Json(serde_json::Error), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("error de/serializing trace entry:")?; + match self { + Error::Json(error) => std::fmt::Display::fmt(&error, f), + } + } +} + +impl From for Error { + fn from(value: serde_json::Error) -> Self { + Self::Json(value) + } +} diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs new file mode 100644 index 000000000..cbc5cf6b2 --- /dev/null +++ b/tracing-trace/src/layer.rs @@ -0,0 +1,152 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::Write; +use std::ops::ControlFlow; +use std::sync::RwLock; + +use tracing::span::{Attributes, Id as TracingId}; +use tracing::{Metadata, Subscriber}; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +use crate::entry::{ + Entry, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, +}; +use crate::{Error, Trace}; + +/// Layer that measures the time spent in spans. +pub struct TraceLayer { + sender: std::sync::mpsc::Sender, + callsites: RwLock>, + start_time: std::time::Instant, + // TODO: kero add handle to allocator stats here +} + +impl Trace { + pub fn new(writer: W) -> (Self, TraceLayer) { + let (sender, receiver) = std::sync::mpsc::channel(); + let trace = Trace { writer, receiver }; + let layer = TraceLayer { + sender, + callsites: Default::default(), + start_time: std::time::Instant::now(), + }; + (trace, layer) + } + + pub fn receive(&mut self) -> Result, Error> { + let Ok(entry) = self.receiver.recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn write(&mut self, entry: Entry) -> Result<(), Error> { + Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?) + } + + pub fn try_receive(&mut self) -> Result, Error> { + let Ok(entry) = self.receiver.try_recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn flush(&mut self) -> Result<(), std::io::Error> { + self.writer.flush() + } +} + +#[derive(PartialEq, Eq, Hash)] +enum OpaqueIdentifier { + Thread(std::thread::ThreadId), + Call(tracing::callsite::Identifier), +} + +impl TraceLayer { + fn resource_id(&self, opaque: OpaqueIdentifier) -> Option { + self.callsites.read().unwrap().get(&opaque).copied() + } + + fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId { + let mut map = self.callsites.write().unwrap(); + let len = map.len(); + *map.entry(opaque).or_insert(ResourceId(len)) + } + + fn elapsed(&self) -> std::time::Duration { + self.start_time.elapsed() + } + + fn send(&self, entry: Entry) { + // we never care that the other end hanged on us + let _ = self.sender.send(entry); + } + + fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId { + let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite())); + + let module_path = metadata.module_path(); + let file = metadata.file(); + let line = metadata.line(); + let name = metadata.name(); + let target = metadata.target(); + + self.send(Entry::NewCallsite(NewCallsite { + call_id, + module_path: module_path.map(Cow::Borrowed), + file: file.map(Cow::Borrowed), + line, + name: Cow::Borrowed(name), + target: Cow::Borrowed(target), + })); + call_id + } + + fn register_thread(&self) -> ResourceId { + let thread_id = std::thread::current().id(); + let name = std::thread::current().name().map(ToOwned::to_owned); + let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id)); + self.send(Entry::NewThread(NewThread { thread_id, name })); + thread_id + } +} + +impl Layer for TraceLayer +where + S: Subscriber, +{ + fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(attrs.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = attrs + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id })); + } + + fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) { + // TODO kero: add memory here + self.send(Entry::SpanEnter(SpanEnter { id: id.into(), time: self.elapsed() })) + } + + fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) { + // TODO kero: add memory here + self.send(Entry::SpanExit(SpanExit { id: id.into(), time: self.elapsed() })) + } + + fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() })) + } +} diff --git a/tracing-trace/src/lib.rs b/tracing-trace/src/lib.rs new file mode 100644 index 000000000..5e0f46d47 --- /dev/null +++ b/tracing-trace/src/lib.rs @@ -0,0 +1,40 @@ +use std::io::{Read, Write}; + +use entry::Entry; + +pub mod entry; +mod error; +pub mod layer; +pub mod processor; + +pub use error::Error; + +pub struct Trace { + writer: W, + receiver: std::sync::mpsc::Receiver, +} + +pub struct TraceReader { + reader: R, +} + +impl TraceReader { + pub fn new(reader: R) -> Self { + Self { reader } + } + + fn read(&mut self) -> Option> { + serde_json::Deserializer::from_reader(&mut self.reader) + .into_iter() + .next() + .map(|res| res.map_err(Into::into)) + } +} + +impl Iterator for TraceReader { + type Item = Result; + + fn next(&mut self) -> Option { + self.read() + } +} diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs new file mode 100644 index 000000000..f9f665861 --- /dev/null +++ b/tracing-trace/src/main.rs @@ -0,0 +1,133 @@ +use tracing::{instrument, Span}; +use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError}; + +#[instrument(level = "trace", target = "profile::indexing")] +fn foo() -> Result<(), TracedError> { + let _ = bar(40, 2); + bar(40, 2) +} + +#[derive(Debug)] +pub enum Error { + XTooBig, +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("x too big") + } +} + +impl std::error::Error for Error {} + +#[instrument(level = "trace", target = "profile::indexing")] +fn bar(x: u32, y: u32) -> Result<(), TracedError> { + let handle_ok = spawn_in_current_scope(move || baz(y)); + let handle = spawn_in_current_scope(move || baz(x + y)); + handle_ok.join().unwrap().and(handle.join().unwrap()) +} + +pub fn spawn_in_current_scope(f: F) -> std::thread::JoinHandle +where + F: FnOnce() -> T + Send + 'static, + T: Send + 'static, +{ + let current = Span::current(); + std::thread::spawn(move || { + let span = tracing::trace_span!(parent: ¤t, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty); + if let Some(name) = std::thread::current().name() { + span.record("name", name); + } + span.in_scope(f) + }) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn baz(x: u32) -> Result<(), TracedError> { + if x > 10 { + fibo_recursive(10); + return Err(Error::XTooBig).in_current_span(); + } + Ok(()) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn fibo_recursive(n: u32) -> u32 { + if n == 0 { + return 1; + } + if n == 1 { + return 2; + } + return fibo_recursive(n - 1) - fibo_recursive(n - 2); +} + +use tracing_error::ExtractSpanTrace as _; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_trace::processor; + +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string(); + let trace = SpanTrace::capture(); + tracing::error!(%info, %trace); +} + +fn main() { + let (mut trace, profiling_layer) = + tracing_trace::Trace::new(std::fs::File::create("trace.json").unwrap()); + + let subscriber = tracing_subscriber::registry() + // any number of other subscriber layers may be added before or + // after the `ErrorLayer`... + .with(ErrorLayer::default()) + .with(profiling_layer) + /*.with( + tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_span_events(FmtSpan::FULL), /*.with_filter( + tracing_subscriber::filter::LevelFilter::from_level(tracing::Level::TRACE).and( + tracing_subscriber::filter::Targets::new() + .with_target("profile", tracing::Level::TRACE) + .not(), + ), + )*/ + )*/; + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + std::panic::set_hook(Box::new(on_panic)); + + let res = foo(); + + if let Err(error) = res { + print_extracted_spantraces(&error) + } + + while trace.try_receive().unwrap().is_continue() {} + + trace.flush().unwrap(); + + let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap()); + + let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap(); + serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap(); +} + +fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) { + let mut error = Some(error); + let mut ind = 0; + + eprintln!("Error:"); + + while let Some(err) = error { + if let Some(spantrace) = err.span_trace() { + eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace)); + } else { + eprintln!("{:>4}: {}", ind, err); + } + + error = err.source(); + ind += 1; + } +} diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs new file mode 100644 index 000000000..d3ac495d8 --- /dev/null +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -0,0 +1,255 @@ +use std::collections::HashMap; + +use fxprof_processed_profile::{ + CategoryPairHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField, + MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, Profile, ProfilerMarker, + ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, +}; +use serde_json::json; + +use crate::entry::{ + Entry, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, +}; +use crate::{Error, TraceReader}; + +pub fn to_firefox_profile( + trace: TraceReader, + app: &str, +) -> Result { + let mut profile = Profile::new( + app, + ReferenceTimestamp::from_millis_since_unix_epoch(0.0), + SamplingInterval::from_nanos(15), + ); + + let mut last_timestamp = Timestamp::from_nanos_since_reference(0); + let main = profile.add_process(app, 0, last_timestamp); + + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + + let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue); + let subcategory = profile.add_subcategory(category, "subcategory"); + + // TODO kero: add counters profile.add_counters + last_memory_value + + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + let string_handle = profile.intern_string(callsite.name.as_ref()); + calls.insert(callsite.call_id, (callsite, string_handle)); + } + Entry::NewThread(thread) => { + let thread_handle = profile.add_thread( + main, + thread.thread_id.to_usize() as u32, + last_timestamp, + threads.is_empty(), + ); + if let Some(name) = &thread.name { + profile.set_thread_name(thread_handle, name) + } + threads.insert(thread.thread_id, thread_handle); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time }) => { + let (_span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + /* TODO kero: compute delta and update them + profile.add_counter_sample( + counter, + timestamp, + value_delta, + number_of_operations_delta, + ) + */ + } + Entry::SpanExit(SpanExit { id, time }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + let thread_handle = threads.get(&span.thread_id).unwrap(); + + let frames = make_frames(span, &spans, &calls, subcategory); + + profile.add_sample( + *thread_handle, + to_timestamp(begin), + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + profile.add_sample( + *thread_handle, + to_timestamp(time), + frames.iter().rev().cloned(), + CpuDelta::from_nanos((time - begin).as_nanos() as u64), + 1, + ); + + /* TODO kero: compute delta and update them + profile.add_counter_sample( + counter, + timestamp, + value_delta, + number_of_operations_delta, + ) + */ + + let (callsite, _) = calls.get(&span.call_id).unwrap(); + + let marker = SpanMarker { callsite, span: &span }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Interval( + to_timestamp(begin), + to_timestamp(time), + ), + frames.iter().rev().cloned(), + ) + } + Entry::SpanClose(SpanClose { id, time }) => { + spans.remove(&id); + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + } + } + } + + Ok(profile) +} + +fn to_timestamp(time: std::time::Duration) -> Timestamp { + Timestamp::from_nanos_since_reference(time.as_nanos() as u64) +} + +fn make_frames( + span: NewSpan, + spans: &HashMap, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> Vec { + let mut frames = Vec::new(); + let mut current_span = span; + loop { + let frame = make_frame(current_span, calls, subcategory); + frames.push(frame); + if let Some(parent) = current_span.parent_id { + current_span = spans.get(&parent).unwrap().0; + } else { + break; + } + } + frames +} + +fn make_frame( + span: NewSpan, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> FrameInfo { + let (_, call) = calls.get(&span.call_id).unwrap(); + FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() } +} + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +struct SpanMarker<'a> { + span: &'a NewSpan, + callsite: &'a NewCallsite, +} + +impl<'a> ProfilerMarker for SpanMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "span"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "span_id", + label: "Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.span.id; + let thread_id = self.span.thread_id; + json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "span_id": span_id, + "thread_id": thread_id, + }) + } +} diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs new file mode 100644 index 000000000..a9356ba26 --- /dev/null +++ b/tracing-trace/src/processor/fmt.rs @@ -0,0 +1,128 @@ +use std::collections::HashMap; +use std::io::Read; + +use crate::entry::{ + Entry, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, +}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +pub fn print_trace(trace: TraceReader) -> Result<(), Error> { + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, callsite); + } + Entry::NewThread(NewThread { thread_id, name }) => { + threads.insert(thread_id, name); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + + let span = *span; + + println!( + "[{}]{}::{} <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span) + ); + } + Entry::SpanExit(SpanExit { id, time }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + + println!( + "[{}]{}::{} -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_duration(time - begin), + ) + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + } + } + Ok(()) +} + +fn print_thread(threads: &HashMap>, thread_id: ResourceId) -> String { + let thread = threads.get(&thread_id).unwrap(); + let thread = + thread.as_ref().cloned().unwrap_or_else(|| format!("ThreadId({})", thread_id.to_usize())); + thread +} + +fn print_backtrace( + spans: &HashMap, + calls: &HashMap, + span: &NewSpan, +) -> String { + let mut parents = Vec::new(); + let mut current = span.parent_id; + while let Some(current_id) = ¤t { + let (span, _) = spans.get(current_id).unwrap(); + let callsite = calls.get(&span.call_id).unwrap(); + parents.push(callsite.name.clone()); + + current = span.parent_id; + } + + let x: Vec = parents.into_iter().rev().map(|x| x.to_string()).collect(); + x.join("::") +} + +fn print_span(calls: &HashMap, span: &NewSpan) -> String { + let callsite = calls.get(&span.call_id).unwrap(); + match (callsite.file.clone(), callsite.line) { + (Some(file), None) => format!("{} ({})", callsite.name, file), + (Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line), + _ => callsite.name.to_string(), + } +} + +fn print_duration(duration: std::time::Duration) -> String { + if duration.as_nanos() < 1000 { + format!("{}ns", duration.as_nanos()) + } else if duration.as_micros() < 1000 { + format!("{}μs", duration.as_micros()) + } else if duration.as_millis() < 1000 { + format!("{}ms", duration.as_millis()) + } else if duration.as_secs() < 120 { + format!("{}s", duration.as_secs()) + } else if duration.as_secs_f64() / 60.0 < 60.0 { + format!("{}min", duration.as_secs_f64() / 60.0) + } else if duration.as_secs_f64() / 3600.0 < 8.0 { + format!("{}h", duration.as_secs_f64() / 3600.0) + } else { + format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0) + } +} diff --git a/tracing-trace/src/processor/mod.rs b/tracing-trace/src/processor/mod.rs new file mode 100644 index 000000000..a84cb3b63 --- /dev/null +++ b/tracing-trace/src/processor/mod.rs @@ -0,0 +1,2 @@ +pub mod firefox_profiler; +pub mod fmt; From 02e6c8a440e2d5b727431bf5c6fb31d886b3f60d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 09:41:59 +0100 Subject: [PATCH 15/87] Add tracing to index-scheduler --- Cargo.lock | 1 + index-scheduler/Cargo.toml | 10 +++++++-- index-scheduler/src/batch.rs | 39 ++++++++++++++++++++++++------------ index-scheduler/src/lib.rs | 2 +- 4 files changed, 36 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9e2cbbb31..ebb8c3072 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2917,6 +2917,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "ureq", "uuid", ] diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 4300bc12c..18e7d78c6 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -30,7 +30,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] } synchronoise = "1.0.1" tempfile = "3.9.0" thiserror = "1.0.56" -time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.31", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } +tracing = "0.1.40" ureq = "2.9.1" uuid = { version = "1.6.1", features = ["serde", "v4"] } @@ -39,4 +45,4 @@ big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.34.0", features = ["json", "redactions"] } meili-snap = { path = "../meili-snap" } -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} +nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a" } diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 01b0ddc1e..8e2eb26a0 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -24,7 +24,6 @@ use std::fs::{self, File}; use std::io::BufWriter; use dump::IndexMetadata; -use log::{debug, error, info, trace}; use meilisearch_types::error::Code; use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; @@ -514,6 +513,7 @@ impl IndexScheduler { /// 3. We get the *next* snapshot to process. /// 4. We get the *next* dump to process. /// 5. We get the *next* tasks to process for a specific index. + #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result> { #[cfg(test)] self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; @@ -619,6 +619,7 @@ impl IndexScheduler { /// The list of tasks that were processed. The metadata of each task in the returned /// list is updated accordingly, with the exception of the its date fields /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] pub(crate) fn process_batch(&self, batch: Batch) -> Result> { #[cfg(test)] { @@ -668,9 +669,10 @@ impl IndexScheduler { Ok(()) => { for content_uuid in canceled_tasks_content_uuids { if let Err(error) = self.delete_update_file(content_uuid) { - error!( - "We failed deleting the content file indentified as {}: {}", - content_uuid, error + tracing::error!( + file_content_uuid = %content_uuid, + %error, + "Failed deleting content file" ) } } @@ -969,7 +971,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(tasks) @@ -997,7 +1002,7 @@ impl IndexScheduler { builder.set_primary_key(primary_key); let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; index_wtxn.commit()?; @@ -1024,7 +1029,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(vec![task]) @@ -1143,6 +1151,11 @@ impl IndexScheduler { /// /// ## Return /// The list of processed tasks. + #[tracing::instrument( + level = "trace", + skip(self, index_wtxn, index), + target = "indexing::scheduler" + )] fn apply_index_operation<'i>( &self, index_wtxn: &mut RwTxn<'i>, @@ -1203,7 +1216,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.set_primary_key(primary_key); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; primary_key_has_been_set = true; @@ -1222,7 +1235,7 @@ impl IndexScheduler { index, indexer_config, config, - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(?indexing_step, "Update"), || must_stop_processing.get(), )?; @@ -1294,7 +1307,7 @@ impl IndexScheduler { if !tasks.iter().all(|res| res.error.is_some()) { let addition = builder.execute()?; - info!("document addition done: {:?}", addition); + tracing::info!(indexing_result = ?addition, "document indexing done"); } else if primary_key_has_been_set { // Everything failed but we've set a primary key. // We need to remove it. @@ -1302,7 +1315,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.reset_primary_key(); builder.execute( - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; } @@ -1372,7 +1385,7 @@ impl IndexScheduler { let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; @@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>( index, indexer_config, config, - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 946a2a33e..7f66c9427 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1193,7 +1193,7 @@ impl IndexScheduler { log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } } - log::info!("A batch of tasks was successfully completed."); + tracing::info!("A batch of tasks was successfully completed."); } // If we have an abortion error we must stop the tick here and re-schedule tasks. Err(Error::Milli(milli::Error::InternalError( From 5d7061682ef7eff9b42a5d1b24ee712cb6f92d01 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 09:42:48 +0100 Subject: [PATCH 16/87] Add tracing to milli --- Cargo.lock | 1 + milli/Cargo.toml | 11 +---- milli/src/documents/reader.rs | 1 + milli/src/update/clear_documents.rs | 6 +++ milli/src/update/index_documents/enrich.rs | 3 ++ .../extract/extract_docid_word_positions.rs | 2 +- .../extract/extract_facet_number_docids.rs | 2 +- .../extract/extract_facet_string_docids.rs | 2 +- .../extract/extract_fid_docid_facet_values.rs | 2 +- .../extract/extract_fid_word_count_docids.rs | 2 +- .../extract/extract_geo_points.rs | 2 +- .../extract/extract_vector_points.rs | 2 +- .../extract/extract_word_docids.rs | 3 +- .../extract_word_pair_proximity_docids.rs | 14 ++++++- .../extract/extract_word_position_docids.rs | 3 +- .../src/update/index_documents/extract/mod.rs | 9 ++++ .../index_documents/helpers/grenad_helpers.rs | 2 + milli/src/update/index_documents/mod.rs | 36 ++++++++++++++-- milli/src/update/index_documents/transform.rs | 9 ++-- .../src/update/index_documents/typed_chunk.rs | 41 +++++++++++++++++++ milli/src/update/settings.rs | 5 +++ milli/src/update/word_prefix_docids.rs | 7 +++- .../src/update/words_prefix_integer_docids.rs | 7 +++- milli/src/update/words_prefixes_fst.rs | 7 +++- 24 files changed, 150 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ebb8c3072..cf28f560b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3840,6 +3840,7 @@ dependencies = [ "time", "tokenizers", "tokio", + "tracing", "uuid", ] diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dc2b992e0..10ad2fb8f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -91,6 +91,7 @@ tiktoken-rs = "0.5.8" liquid = "0.26.4" arroy = "0.2.0" rand = "0.8.5" +tracing = "0.1.40" [dev-dependencies] mimalloc = { version = "0.1.39", default-features = false } @@ -102,15 +103,7 @@ meili-snap = { path = "../meili-snap" } rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = [ - "charabia/chinese", - "charabia/hebrew", - "charabia/japanese", - "charabia/thai", - "charabia/korean", - "charabia/greek", - "charabia/khmer", -] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/reader.rs b/milli/src/documents/reader.rs index a8a4c662d..d5eda69d4 100644 --- a/milli/src/documents/reader.rs +++ b/milli/src/documents/reader.rs @@ -25,6 +25,7 @@ impl DocumentsBatchReader { /// /// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// method to iterator over the documents, from the first to the last. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn from_reader(reader: R) -> Result { let reader = grenad::Reader::new(reader)?; let mut cursor = reader.into_cursor()?; diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index a6c7ff2b1..6715939dc 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } + #[tracing::instrument( + level = "trace", + skip(self), + target = "indexing::documents", + name = "clear_documents" + )] pub fn execute(self) -> Result { puffin::profile_function!(); diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 03eb3f4de..82ea335ae 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result}; /// # Panics /// /// - if reader.is_empty(), this function may panic in some cases +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn enrich_documents_batch( rtxn: &heed::RoTxn, index: &Index, @@ -143,6 +144,8 @@ pub fn enrich_documents_batch( /// Retrieve the document id after validating it, returning a `UserError` /// if the id is invalid or can't be guessed. +#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document) +target = "indexing::documents")] fn fetch_or_generate_document_id( document: &obkv::KvReader, documents_batch_index: &DocumentsBatchIndex, diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index a6bbf939a..d568154b2 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R /// /// Returns the generated internal documents ids and a grenad reader /// with the list of extracted words from the given chunk of documents. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_docid_word_positions( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index f860aacba..33def5abd 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -16,7 +16,7 @@ use crate::Result; /// /// Returns a grenad reader with the list of extracted facet numbers and /// documents ids from the given chunk of docid facet number positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_number_docids( fid_docid_facet_number: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 2ade776c3..d14be7464 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Result}; /// /// Returns a grenad reader with the list of extracted facet strings and /// documents ids from the given chunk of docid facet string positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_string_docids( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index b7de1e621..2449e01cd 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -39,7 +39,7 @@ pub struct ExtractedFacetValues { /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 182d0c5d8..305af3630 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30; /// /// Returns a grenad reader with the list of extracted field id word counts /// and documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_word_count_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index b3600e3bc..cfcc021c6 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. /// /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_geo_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 87181edc2..0bf7333e3 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -67,7 +67,7 @@ impl VectorStateDelta { /// Extracts the embedding vector contained in each document under the `_vectors` field. /// /// Returns the generated grenad reader containing the docid as key associated to the Vec -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 66092821f..f38701dac 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result}; /// /// The first returned reader is the one for normal word_docids, and the second one is for /// exact_word_docids -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -135,6 +135,7 @@ pub fn extract_word_docids( )) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_into_sorter( document_id: DocumentId, fid: FieldId, diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index d364b3e13..82a94ce00 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -19,7 +19,7 @@ use crate::{DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted word pairs proximities and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids( if current_document_id.map_or(false, |id| id != document_id) { puffin::profile_scope!("Document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( current_document_id.unwrap(), &del_word_pair_proximity, @@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids( if let Some(document_id) = current_document_id { puffin::profile_scope!("Final document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( document_id, &del_word_pair_proximity, @@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids( } { puffin::profile_scope!("sorter_into_reader"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader"); + let _entered = span.enter(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 89b77d140..4bc553d9a 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted words at positions and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_position_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -94,6 +94,7 @@ pub fn extract_word_position_docids( Ok(word_position_docids_reader) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_position_into_sorter( document_id: DocumentId, key_buffer: &mut Vec, diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 43e9a36ec..f0fd97965 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result}; /// Extract data for each databases from obkv documents in parallel. /// Send data in grenad file over provided Sender. #[allow(clippy::too_many_arguments)] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub(crate) fn data_from_obkv_documents( original_obkv_chunks: impl Iterator>>> + Send, flattened_obkv_chunks: impl Iterator>>> + Send, @@ -257,12 +258,20 @@ fn spawn_extraction_task( M: MergeableReader + FromParallelIterator + Send + 'static, M::Output: Send, { + let current_span = tracing::Span::current(); + rayon::spawn(move || { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_multiple_chunks"); + let _entered = child_span.enter(); puffin::profile_scope!("extract_multiple_chunks", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); + let current_span = tracing::Span::current(); + rayon::spawn(move || match chunks { Ok(chunks) => { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "merge_multiple_chunks"); + let _entered = child_span.enter(); debug!("merge {} database", name); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index e1b27baa2..e30bd0507 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -49,6 +49,7 @@ pub fn create_sorter( builder.build() } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn sorter_into_reader( sorter: grenad::Sorter, indexer: GrenadParameters, @@ -240,6 +241,7 @@ pub fn grenad_obkv_into_chunks( /// Write provided sorter in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn write_sorter_into_database( sorter: Sorter, database: &heed::Database, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 83d2c08e1..cbcde19fc 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -134,6 +134,7 @@ where /// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward. /// /// Returns the number of documents added to the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn add_documents( mut self, reader: DocumentsBatchReader, @@ -179,6 +180,7 @@ where /// Remove a batch of documents from the current builder. /// /// Returns the number of documents deleted from the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( mut self, to_delete: Vec, @@ -214,6 +216,7 @@ where /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( mut self, to_delete: &RoaringBitmap, @@ -237,7 +240,12 @@ where Ok((self, deleted_documents)) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace" + skip_all, + target = "indexing::documents", + name = "index_documents" + )] pub fn execute(mut self) -> Result { puffin::profile_function!(); @@ -273,7 +281,12 @@ where } /// Returns the total number of documents in the index after the update. - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "profile::indexing::details", + name = "index_documents_raw" + )] pub fn execute_raw(self, output: TransformOutput) -> Result where FP: Fn(UpdateIndexingStep) + Sync, @@ -374,8 +387,12 @@ where let cloned_embedder = self.embedders.clone(); + let current_span = tracing::Span::current(); + // Run extraction pipeline in parallel. pool.install(|| { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); + let _enter = child_span.enter(); puffin::profile_scope!("extract_and_send_grenad_chunks"); // split obkv file into several chunks let original_chunk_iter = @@ -543,7 +560,12 @@ where Ok(number_of_documents) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_prefix_databases" + )] pub fn execute_prefix_databases( self, word_docids: Option>, @@ -598,6 +620,8 @@ where let del_prefix_fst_words; { + let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs"); + let _entered = span.enter(); puffin::profile_scope!("compute_prefix_diffs"); current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; @@ -722,6 +746,12 @@ where /// Run the word prefix docids update operation. #[allow(clippy::too_many_arguments)] +#[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_word_prefix_docids" +)] fn execute_word_prefix_docids( txn: &mut heed::RwTxn, reader: grenad::Reader>, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ab8e27edb..e5392092f 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> { }) } - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn read_documents( &mut self, reader: EnrichedDocumentsBatchReader, @@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db, /// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids. /// - If the document to remove was not present in either the db or the transform we do nothing. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( &mut self, mut to_remove: Vec, @@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( &mut self, to_remove: &RoaringBitmap, @@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> { // Flatten a document from the fields ids map contained in self and insert the new // created fields. Returns `None` if the document doesn't need to be flattened. + #[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")] fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result>> { if obkv .iter() @@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// Generate the `TransformOutput` based on the given sorter that can be generated from any /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// id for the user side and the value must be an obkv where keys are valid fields ids. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")] pub(crate) fn output_from_sorter( self, wtxn: &mut heed::RwTxn, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 58cb9bb02..7db0279ba 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -115,6 +115,7 @@ impl TypedChunk { /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( typed_chunk: TypedChunk, index: &Index, @@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index( let mut is_merged_database = false; match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "documents"); + let _entered = span.enter(); let mut operations: Vec = Default::default(); let mut docids = index.documents_ids(wtxn)?; @@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index( index.put_documents_ids(wtxn, &docids)?; } TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids"); + let _entered = span.enter(); append_entries_into_database( fid_word_count_docids_iter, &index.field_id_word_count_docids, @@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index( exact_word_docids_reader, word_fid_docids_reader, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_docids"); + let _entered = span.enter(); let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?; append_entries_into_database( word_docids_iter.clone(), @@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPositionDocids(word_position_docids_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids"); + let _entered = span.enter(); append_entries_into_database( word_position_docids_iter, &index.word_position_docids, @@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_exists_docids, &index.facet_id_exists_docids, @@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_null_docids, &index.facet_id_is_null_docids, @@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { + let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_empty_docids, &index.facet_id_is_empty_docids, @@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids"); + let _entered = span.enter(); append_entries_into_database( word_pair_proximity_docids_iter, &index.word_pair_proximity_docids, @@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers"); + let _entered = span.enter(); let index_fid_docid_facet_numbers = index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; @@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings"); + let _entered = span.enter(); let index_fid_docid_facet_strings = index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; @@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::GeoPoints(geo_points) => { + let span = tracing::trace_span!(target: "indexing::write_db", "geo_points"); + let _entered = span.enter(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; @@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index( expected_dimension, embedder_name, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "vector_points"); + let _entered = span.enter(); let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; @@ -483,6 +520,8 @@ pub(crate) fn write_typed_chunk_into_index( log::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { + let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); + let _entered = span.enter(); for (key, (deletion, addition)) in sl_map { let mut db_key_exists = false; let final_value = match index.script_language_docids.get(wtxn, &key)? { @@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst( /// Write provided entries in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(skip_all, target = "indexing::write_db")] fn write_entries_into_database( data: grenad::Reader, database: &heed::Database, @@ -582,6 +622,7 @@ where /// merge_values function is used if an entry already exist in the database. /// All provided entries must be ordered. /// If the index is not empty, write_entries_into_database is called instead. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn append_entries_into_database( data: grenad::Reader, database: &heed::Database, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a3ba42119..3cad79467 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + #[tracing::instrument( + level = "trace" + skip(self, progress_callback, should_abort, old_fields_ids_map), + target = "indexing::documents" + )] fn reindex( &mut self, progress_callback: &FP, diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 544bea224..99c6c815e 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "word_prefix_docids" + )] pub fn execute( self, mut new_word_docids_iter: grenad::ReaderCursor, diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 819cc097b..23a676bc8 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixIntegerDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_integer_docids" + )] pub fn execute( self, new_word_integer_docids: grenad::Reader, diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index f26bf93e5..bb1830727 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> { self } - #[logging_timer::time("WordsPrefixesFst::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_fst" + )] pub fn execute(self) -> Result<()> { puffin::profile_function!(); From 7e47cea0c4ef6d31a4d8b5cc8196f100919b9540 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 09:43:48 +0100 Subject: [PATCH 17/87] Add tracing to Meilisearch --- Cargo.lock | 3 +++ meilisearch/Cargo.toml | 3 +++ meilisearch/src/main.rs | 52 +++++++++++++++++++++++++++++++++++------ 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf28f560b..67920b4bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3679,6 +3679,9 @@ dependencies = [ "tokio", "tokio-stream", "toml", + "tracing", + "tracing-subscriber", + "tracing-trace", "url", "urlencoding", "uuid", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 1d7f53229..98588b6c7 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -105,6 +105,9 @@ yaup = "0.2.1" serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.0", features = ["serde"] } +tracing = "0.1.40" +tracing-subscriber = "0.3.18" +tracing-trace = { version = "0.1.0", path = "../tracing-trace" } [dev-dependencies] actix-rt = "2.9.0" diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index ddd37bbb6..5a329eee2 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,31 +1,69 @@ use std::env; use std::io::{stderr, Write}; +use std::ops::ControlFlow; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use actix_web::http::KeepAlive; use actix_web::web::Data; use actix_web::HttpServer; +use anyhow::Context; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_subscriber::Layer; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; /// does all the setup before meilisearch is launched fn setup(opt: &Opt) -> anyhow::Result<()> { - let mut log_builder = env_logger::Builder::new(); - let log_filters = format!( - "{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn", - opt.log_level - ); - log_builder.parse_filters(&log_filters); + let now = time::OffsetDateTime::now_utc(); + let format = time::format_description::parse("[year]-[month]-[day]_[hour]:[minute]:[second]")?; + let trace_file = format!("{}-indexing-trace.json", now.format(&format)?); - log_builder.init(); + let file = std::fs::File::create(&trace_file) + .with_context(|| format!("could not create trace file at '{}'", trace_file))?; + // TODO kero: Pass the allocator stats to Trace here + let (mut trace, layer) = tracing_trace::Trace::new(file); + + let subscriber = tracing_subscriber::registry() + .with( + tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) + .unwrap(), + ), + ) + .with( + layer.with_filter( + tracing_subscriber::filter::Targets::new() + .with_target("indexing::", tracing::Level::TRACE), + ), + ); + + std::thread::spawn(move || { + loop { + trace.flush().unwrap(); + match trace.receive() { + Ok(ControlFlow::Continue(_)) => continue, + Ok(ControlFlow::Break(_)) => break, + Err(_) => todo!(), + } + } + while trace.try_receive().is_ok() {} + trace.flush().unwrap(); + }); + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); Ok(()) } From 771861599b437eb1986d3ed9a2b4d57834d39dcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 18 Jan 2024 18:14:47 +0100 Subject: [PATCH 18/87] Logging the memory usage over time --- Cargo.lock | 8 ++ meilisearch/Cargo.toml | 3 +- meilisearch/src/main.rs | 8 +- tracing-trace/Cargo.toml | 2 + tracing-trace/src/entry.rs | 52 ++++++++++ tracing-trace/src/layer.rs | 38 +++++-- .../src/processor/firefox_profiler.rs | 99 +++++++++++++++---- tracing-trace/src/processor/fmt.rs | 56 ++++++++--- 8 files changed, 222 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67920b4bc..1d149733c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3669,6 +3669,7 @@ dependencies = [ "siphasher 1.0.0", "slice-group-by", "static-files", + "stats_alloc", "sysinfo", "tar", "temp-env", @@ -5229,6 +5230,11 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stats_alloc" +version = "0.1.10" +source = "git+https://github.com/Kerollmops/stats_alloc?branch=stable-const-fn-trait#6f83c52160c7d0550fdf770e1f73d239b0ff9a97" + [[package]] name = "strsim" version = "0.10.0" @@ -5706,8 +5712,10 @@ version = "0.1.0" dependencies = [ "color-spantrace", "fxprof-processed-profile", + "once_cell", "serde", "serde_json", + "stats_alloc", "tracing", "tracing-error", "tracing-subscriber", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 98588b6c7..39e120bfb 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -108,6 +108,7 @@ url = { version = "2.5.0", features = ["serde"] } tracing = "0.1.40" tracing-subscriber = "0.3.18" tracing-trace = { version = "0.1.0", path = "../tracing-trace" } +stats_alloc = { git = "https://github.com/Kerollmops/stats_alloc", branch = "stable-const-fn-trait", optional = true } [dev-dependencies] actix-rt = "2.9.0" @@ -136,7 +137,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] } zip = { version = "0.6.6", optional = true } [features] -default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] +default = ["stats_alloc", "analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] mini-dashboard = [ "actix-web-static-files", diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 5a329eee2..fd3879b36 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -14,12 +14,18 @@ use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; +use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use tracing_subscriber::layer::SubscriberExt as _; use tracing_subscriber::Layer; +#[cfg(not(feature = "stats_alloc"))] #[global_allocator] -static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; +static ALLOC: MiMalloc = MiMalloc; + +#[cfg(feature = "stats_alloc")] +#[global_allocator] +static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); /// does all the setup before meilisearch is launched fn setup(opt: &Opt) -> anyhow::Result<()> { diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index 9215fdfd0..8cd863368 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -13,3 +13,5 @@ serde_json = "1.0.111" tracing = "0.1.40" tracing-error = "0.2.0" tracing-subscriber = "0.3.18" +stats_alloc = { git = "https://github.com/Kerollmops/stats_alloc", branch = "stable-const-fn-trait" } +once_cell = "1.19.0" diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index dd91a2a61..06ec05258 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::ops::Sub; use serde::{Deserialize, Serialize}; use tracing::span::Id as TracingId; @@ -64,12 +65,14 @@ pub struct NewThread { pub struct SpanEnter { pub id: SpanId, pub time: std::time::Duration, + pub memory: Option, } #[derive(Clone, Copy, Debug, Serialize, Deserialize)] pub struct SpanExit { pub id: SpanId, pub time: std::time::Duration, + pub memory: Option, } #[derive(Clone, Copy, Debug, Serialize, Deserialize)] @@ -86,6 +89,55 @@ pub struct SpanClose { pub time: std::time::Duration, } +/// A struct with a lot of memory allocation stats akin +/// to the `stats_alloc::Stats` one but implements the +/// `Serialize/Deserialize` serde traits. +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub struct MemoryStats { + pub allocations: usize, + pub deallocations: usize, + pub reallocations: usize, + pub bytes_allocated: usize, + pub bytes_deallocated: usize, + pub bytes_reallocated: isize, +} + +impl From for MemoryStats { + fn from(stats: stats_alloc::Stats) -> Self { + let stats_alloc::Stats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + } = stats; + MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + } + } +} + +impl Sub for MemoryStats { + type Output = Self; + + fn sub(self, other: Self) -> Self::Output { + Self { + allocations: self.allocations - other.allocations, + deallocations: self.deallocations - other.deallocations, + reallocations: self.reallocations - other.reallocations, + bytes_allocated: self.bytes_allocated - other.bytes_allocated, + bytes_deallocated: self.bytes_deallocated - other.bytes_deallocated, + bytes_reallocated: self.bytes_reallocated - other.bytes_reallocated, + } + } +} + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] pub struct ResourceId(pub(crate) usize); diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index cbc5cf6b2..f5464b699 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -1,9 +1,11 @@ +use std::alloc::{GlobalAlloc, System}; use std::borrow::Cow; use std::collections::HashMap; use std::io::Write; use std::ops::ControlFlow; use std::sync::RwLock; +use stats_alloc::StatsAlloc; use tracing::span::{Attributes, Id as TracingId}; use tracing::{Metadata, Subscriber}; use tracing_subscriber::layer::Context; @@ -15,21 +17,37 @@ use crate::entry::{ use crate::{Error, Trace}; /// Layer that measures the time spent in spans. -pub struct TraceLayer { +pub struct TraceLayer { sender: std::sync::mpsc::Sender, callsites: RwLock>, start_time: std::time::Instant, - // TODO: kero add handle to allocator stats here + memory_allocator: Option<&'static StatsAlloc>, } impl Trace { - pub fn new(writer: W) -> (Self, TraceLayer) { + pub fn new(writer: W) -> (Self, TraceLayer) { let (sender, receiver) = std::sync::mpsc::channel(); let trace = Trace { writer, receiver }; let layer = TraceLayer { sender, callsites: Default::default(), start_time: std::time::Instant::now(), + memory_allocator: None, + }; + (trace, layer) + } + + pub fn with_stats_alloc( + writer: W, + stats_alloc: &'static StatsAlloc, + ) -> (Self, TraceLayer) { + let (sender, receiver) = std::sync::mpsc::channel(); + let trace = Trace { writer, receiver }; + let layer = TraceLayer { + sender, + callsites: Default::default(), + start_time: std::time::Instant::now(), + memory_allocator: Some(stats_alloc), }; (trace, layer) } @@ -137,13 +155,19 @@ where } fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) { - // TODO kero: add memory here - self.send(Entry::SpanEnter(SpanEnter { id: id.into(), time: self.elapsed() })) + self.send(Entry::SpanEnter(SpanEnter { + id: id.into(), + time: self.elapsed(), + memory: self.memory_allocator.map(|ma| ma.stats().into()), + })) } fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) { - // TODO kero: add memory here - self.send(Entry::SpanExit(SpanExit { id: id.into(), time: self.elapsed() })) + self.send(Entry::SpanExit(SpanExit { + id: id.into(), + time: self.elapsed(), + memory: self.memory_allocator.map(|ma| ma.stats().into()), + })) } fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) { diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index d3ac495d8..ce04d8bcf 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -5,10 +5,11 @@ use fxprof_processed_profile::{ MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, Profile, ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, }; +use once_cell::unsync::Lazy; use serde_json::json; use crate::entry::{ - Entry, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, + Entry, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, }; use crate::{Error, TraceReader}; @@ -33,6 +34,16 @@ pub fn to_firefox_profile( let subcategory = profile.add_subcategory(category, "subcategory"); // TODO kero: add counters profile.add_counters + last_memory_value + let mut current_memory = MemoryStats::default(); + let mut allocations_counter = Lazy::new(|| { + profile.add_counter(main, "mimmalloc", "Memory", "Amount of allocation calls") + }); + let mut deallocations_counter = Lazy::new(|| { + profile.add_counter(main, "mimmalloc", "Memory", "Amount of deallocation calls") + }); + let mut reallocations_counter = Lazy::new(|| { + profile.add_counter(main, "mimmalloc", "Memory", "Amount of reallocation calls") + }); for entry in trace { let entry = entry?; @@ -56,7 +67,7 @@ pub fn to_firefox_profile( Entry::NewSpan(span) => { spans.insert(span.id, (span, SpanStatus::Outside)); } - Entry::SpanEnter(SpanEnter { id, time }) => { + Entry::SpanEnter(SpanEnter { id, time, memory }) => { let (_span, status) = spans.get_mut(&id).unwrap(); let SpanStatus::Outside = status else { @@ -67,16 +78,41 @@ pub fn to_firefox_profile( last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); - /* TODO kero: compute delta and update them - profile.add_counter_sample( - counter, - timestamp, - value_delta, - number_of_operations_delta, - ) - */ + if let Some(stats) = memory { + let MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + } = current_memory - stats; + + profile.add_counter_sample( + *allocations_counter, + last_timestamp, + bytes_allocated as f64, + allocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + *deallocations_counter, + last_timestamp, + bytes_deallocated as f64, + deallocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + *reallocations_counter, + last_timestamp, + bytes_reallocated as f64, + reallocations.try_into().unwrap(), + ); + + current_memory = stats; + } } - Entry::SpanExit(SpanExit { id, time }) => { + Entry::SpanExit(SpanExit { id, time, memory }) => { let (span, status) = spans.get_mut(&id).unwrap(); let SpanStatus::Inside(begin) = status else { @@ -108,14 +144,39 @@ pub fn to_firefox_profile( 1, ); - /* TODO kero: compute delta and update them - profile.add_counter_sample( - counter, - timestamp, - value_delta, - number_of_operations_delta, - ) - */ + if let Some(stats) = memory { + let MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + } = current_memory - stats; + + profile.add_counter_sample( + *allocations_counter, + last_timestamp, + bytes_allocated as f64, + allocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + *deallocations_counter, + last_timestamp, + bytes_deallocated as f64, + deallocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + *reallocations_counter, + last_timestamp, + bytes_reallocated as f64, + reallocations.try_into().unwrap(), + ); + + current_memory = stats; + } let (callsite, _) = calls.get(&span.call_id).unwrap(); diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index a9356ba26..8e99752cb 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -2,7 +2,8 @@ use std::collections::HashMap; use std::io::Read; use crate::entry::{ - Entry, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, + Entry, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, }; use crate::{Error, TraceReader}; @@ -28,7 +29,7 @@ pub fn print_trace(trace: TraceReader) -> Result<(), Error> { Entry::NewSpan(span) => { spans.insert(span.id, (span, SpanStatus::Outside)); } - Entry::SpanEnter(SpanEnter { id, time }) => { + Entry::SpanEnter(SpanEnter { id, time, memory }) => { let (span, status) = spans.get_mut(&id).unwrap(); let SpanStatus::Outside = status else { @@ -39,14 +40,23 @@ pub fn print_trace(trace: TraceReader) -> Result<(), Error> { let span = *span; - println!( - "[{}]{}::{} <-", - print_thread(&threads, span.thread_id), - print_backtrace(&spans, &calls, &span), - print_span(&calls, &span) - ); + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + ), + None => println!( + "[{}]{}::{} <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + ), + } } - Entry::SpanExit(SpanExit { id, time }) => { + Entry::SpanExit(SpanExit { id, time, memory }) => { let (span, status) = spans.get_mut(&id).unwrap(); let SpanStatus::Inside(begin) = status else { @@ -58,13 +68,23 @@ pub fn print_trace(trace: TraceReader) -> Result<(), Error> { let span = *span; - println!( - "[{}]{}::{} -> {}", - print_thread(&threads, span.thread_id), - print_backtrace(&spans, &calls, &span), - print_span(&calls, &span), - print_duration(time - begin), - ) + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + print_duration(time - begin), + ), + None => println!( + "[{}]{}::{} -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_duration(time - begin), + ), + } } Entry::SpanClose(SpanClose { id, time: _ }) => { spans.remove(&id); @@ -126,3 +146,7 @@ fn print_duration(duration: std::time::Duration) -> String { format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0) } } + +fn print_memory(memory: MemoryStats) -> String { + // Format only the total allocations in GiB, MiB, KiB, Bytes +} From 6cf703387dfad799b23b79a467dd20af74b47848 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 10:21:50 +0100 Subject: [PATCH 19/87] Format the bytes as human readable bytes Uses the same `byte_unit` version as `meilisearch` --- Cargo.lock | 2 +- tracing-trace/Cargo.toml | 6 ++- .../src/processor/firefox_profiler.rs | 41 ++++++++++++------- tracing-trace/src/processor/fmt.rs | 20 ++++++++- 4 files changed, 52 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1d149733c..8ef99a0cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5710,9 +5710,9 @@ dependencies = [ name = "tracing-trace" version = "0.1.0" dependencies = [ + "byte-unit", "color-spantrace", "fxprof-processed-profile", - "once_cell", "serde", "serde_json", "stats_alloc", diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index 8cd863368..04b7494d4 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -14,4 +14,8 @@ tracing = "0.1.40" tracing-error = "0.2.0" tracing-subscriber = "0.3.18" stats_alloc = { git = "https://github.com/Kerollmops/stats_alloc", branch = "stable-const-fn-trait" } -once_cell = "1.19.0" +byte-unit = { version = "4.0.19", default-features = false, features = [ + "std", + "serde", +] } + diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index ce04d8bcf..84b88aafb 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -5,7 +5,6 @@ use fxprof_processed_profile::{ MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, Profile, ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, }; -use once_cell::unsync::Lazy; use serde_json::json; use crate::entry::{ @@ -33,17 +32,19 @@ pub fn to_firefox_profile( let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue); let subcategory = profile.add_subcategory(category, "subcategory"); - // TODO kero: add counters profile.add_counters + last_memory_value let mut current_memory = MemoryStats::default(); - let mut allocations_counter = Lazy::new(|| { + let init_allocations = |profile: &mut Profile| { profile.add_counter(main, "mimmalloc", "Memory", "Amount of allocation calls") - }); - let mut deallocations_counter = Lazy::new(|| { + }; + let init_deallocations = |profile: &mut Profile| { profile.add_counter(main, "mimmalloc", "Memory", "Amount of deallocation calls") - }); - let mut reallocations_counter = Lazy::new(|| { + }; + let init_reallocations = |profile: &mut Profile| { profile.add_counter(main, "mimmalloc", "Memory", "Amount of reallocation calls") - }); + }; + let mut allocations_counter = None; + let mut deallocations_counter = None; + let mut reallocations_counter = None; for entry in trace { let entry = entry?; @@ -88,22 +89,28 @@ pub fn to_firefox_profile( bytes_reallocated, } = current_memory - stats; + let counter = + *allocations_counter.get_or_insert_with(|| init_allocations(&mut profile)); profile.add_counter_sample( - *allocations_counter, + counter, last_timestamp, bytes_allocated as f64, allocations.try_into().unwrap(), ); + let counter = *deallocations_counter + .get_or_insert_with(|| init_deallocations(&mut profile)); profile.add_counter_sample( - *deallocations_counter, + counter, last_timestamp, bytes_deallocated as f64, deallocations.try_into().unwrap(), ); + let counter = *reallocations_counter + .get_or_insert_with(|| init_reallocations(&mut profile)); profile.add_counter_sample( - *reallocations_counter, + counter, last_timestamp, bytes_reallocated as f64, reallocations.try_into().unwrap(), @@ -154,22 +161,28 @@ pub fn to_firefox_profile( bytes_reallocated, } = current_memory - stats; + let counter = + *allocations_counter.get_or_insert_with(|| init_allocations(&mut profile)); profile.add_counter_sample( - *allocations_counter, + counter, last_timestamp, bytes_allocated as f64, allocations.try_into().unwrap(), ); + let counter = *deallocations_counter + .get_or_insert_with(|| init_deallocations(&mut profile)); profile.add_counter_sample( - *deallocations_counter, + counter, last_timestamp, bytes_deallocated as f64, deallocations.try_into().unwrap(), ); + let counter = *reallocations_counter + .get_or_insert_with(|| init_reallocations(&mut profile)); profile.add_counter_sample( - *reallocations_counter, + counter, last_timestamp, bytes_reallocated as f64, reallocations.try_into().unwrap(), diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index 8e99752cb..31fc1d6cb 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -147,6 +147,24 @@ fn print_duration(duration: std::time::Duration) -> String { } } +/// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. fn print_memory(memory: MemoryStats) -> String { - // Format only the total allocations in GiB, MiB, KiB, Bytes + use byte_unit::Byte; + + let allocated_bytes = Byte::from_bytes(memory.bytes_allocated.try_into().unwrap()); + let deallocated_bytes = Byte::from_bytes(memory.bytes_deallocated.try_into().unwrap()); + + let reallocated_sign = if memory.bytes_reallocated < 0 { "-" } else { "" }; + let reallocated_bytes = + Byte::from_bytes(memory.bytes_reallocated.abs_diff(0).try_into().unwrap()); + + let adjusted_allocated_bytes = allocated_bytes.get_appropriate_unit(true); + let adjusted_deallocated_bytes = deallocated_bytes.get_appropriate_unit(true); + let adjusted_reallocated_bytes = reallocated_bytes.get_appropriate_unit(true); + + format!( + "Allocated {adjusted_allocated_bytes:.2}, \ + Deallocated {adjusted_deallocated_bytes:.2}, \ + Reallocated {reallocated_sign}{adjusted_reallocated_bytes:.2}" + ) } From 83fb2949c304a48190b77de1d187b80ed5e0f087 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 22 Jan 2024 14:32:57 +0100 Subject: [PATCH 20/87] Give the allocator to the tracer when necessary --- meilisearch/src/main.rs | 4 +++- tracing-trace/src/layer.rs | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index fd3879b36..373280e58 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -35,8 +35,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { let file = std::fs::File::create(&trace_file) .with_context(|| format!("could not create trace file at '{}'", trace_file))?; - // TODO kero: Pass the allocator stats to Trace here + #[cfg(not(feature = "stats_alloc"))] let (mut trace, layer) = tracing_trace::Trace::new(file); + #[cfg(feature = "stats_alloc")] + let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); let subscriber = tracing_subscriber::registry() .with( diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index f5464b699..66d54c674 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -83,7 +83,7 @@ enum OpaqueIdentifier { Call(tracing::callsite::Identifier), } -impl TraceLayer { +impl TraceLayer { fn resource_id(&self, opaque: OpaqueIdentifier) -> Option { self.callsites.read().unwrap().get(&opaque).copied() } @@ -132,9 +132,10 @@ impl TraceLayer { } } -impl Layer for TraceLayer +impl Layer for TraceLayer where S: Subscriber, + A: GlobalAlloc, { fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) { let call_id = self From ca8990394e9ac2a7d0fdd5b6c0a5587182efe4cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 22 Jan 2024 14:38:19 +0100 Subject: [PATCH 21/87] Remove the stats_alloc from the default features --- meilisearch/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 39e120bfb..8ed35d542 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -137,7 +137,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] } zip = { version = "0.6.6", optional = true } [features] -default = ["stats_alloc", "analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] +default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] mini-dashboard = [ "actix-web-static-files", From 256538ccb949a11520502f41c3434e4a2228f151 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 11:47:22 +0100 Subject: [PATCH 22/87] Refactor memory handling and add markers --- tracing-trace/src/entry.rs | 23 +- .../src/processor/firefox_profiler.rs | 257 +++++++++++------- 2 files changed, 168 insertions(+), 112 deletions(-) diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index 06ec05258..d92ff09b8 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -1,5 +1,4 @@ use std::borrow::Cow; -use std::ops::Sub; use serde::{Deserialize, Serialize}; use tracing::span::Id as TracingId; @@ -123,18 +122,16 @@ impl From for MemoryStats { } } -impl Sub for MemoryStats { - type Output = Self; - - fn sub(self, other: Self) -> Self::Output { - Self { - allocations: self.allocations - other.allocations, - deallocations: self.deallocations - other.deallocations, - reallocations: self.reallocations - other.reallocations, - bytes_allocated: self.bytes_allocated - other.bytes_allocated, - bytes_deallocated: self.bytes_deallocated - other.bytes_deallocated, - bytes_reallocated: self.bytes_reallocated - other.bytes_reallocated, - } +impl MemoryStats { + pub fn checked_sub(self, other: Self) -> Option { + Some(Self { + allocations: self.allocations.checked_sub(other.allocations)?, + deallocations: self.deallocations.checked_sub(other.deallocations)?, + reallocations: self.reallocations.checked_sub(other.reallocations)?, + bytes_allocated: self.bytes_allocated.checked_sub(other.bytes_allocated)?, + bytes_deallocated: self.bytes_deallocated.checked_sub(other.bytes_deallocated)?, + bytes_reallocated: self.bytes_reallocated.checked_sub(other.bytes_reallocated)?, + }) } } diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index 84b88aafb..eda9f6467 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; use fxprof_processed_profile::{ - CategoryPairHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField, - MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, Profile, ProfilerMarker, - ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, + CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField, + MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile, + ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, }; use serde_json::json; @@ -33,18 +33,8 @@ pub fn to_firefox_profile( let subcategory = profile.add_subcategory(category, "subcategory"); let mut current_memory = MemoryStats::default(); - let init_allocations = |profile: &mut Profile| { - profile.add_counter(main, "mimmalloc", "Memory", "Amount of allocation calls") - }; - let init_deallocations = |profile: &mut Profile| { - profile.add_counter(main, "mimmalloc", "Memory", "Amount of deallocation calls") - }; - let init_reallocations = |profile: &mut Profile| { - profile.add_counter(main, "mimmalloc", "Memory", "Amount of reallocation calls") - }; - let mut allocations_counter = None; - let mut deallocations_counter = None; - let mut reallocations_counter = None; + + let mut memory_counters = None; for entry in trace { let entry = entry?; @@ -75,59 +65,29 @@ pub fn to_firefox_profile( continue; }; - *status = SpanStatus::Inside(time); + *status = SpanStatus::Inside { time, memory }; last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); - if let Some(stats) = memory { - let MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - } = current_memory - stats; - - let counter = - *allocations_counter.get_or_insert_with(|| init_allocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_allocated as f64, - allocations.try_into().unwrap(), - ); - - let counter = *deallocations_counter - .get_or_insert_with(|| init_deallocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_deallocated as f64, - deallocations.try_into().unwrap(), - ); - - let counter = *reallocations_counter - .get_or_insert_with(|| init_reallocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_reallocated as f64, - reallocations.try_into().unwrap(), - ); - - current_memory = stats; - } + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut current_memory, + ); } Entry::SpanExit(SpanExit { id, time, memory }) => { let (span, status) = spans.get_mut(&id).unwrap(); - let SpanStatus::Inside(begin) = status else { + let SpanStatus::Inside { time: begin, memory: begin_memory } = status else { continue; }; last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); let begin = *begin; + let begin_memory = *begin_memory; *status = SpanStatus::Outside; @@ -151,49 +111,20 @@ pub fn to_firefox_profile( 1, ); - if let Some(stats) = memory { - let MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - } = current_memory - stats; - - let counter = - *allocations_counter.get_or_insert_with(|| init_allocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_allocated as f64, - allocations.try_into().unwrap(), - ); - - let counter = *deallocations_counter - .get_or_insert_with(|| init_deallocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_deallocated as f64, - deallocations.try_into().unwrap(), - ); - - let counter = *reallocations_counter - .get_or_insert_with(|| init_reallocations(&mut profile)); - profile.add_counter_sample( - counter, - last_timestamp, - bytes_reallocated as f64, - reallocations.try_into().unwrap(), - ); - - current_memory = stats; - } + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut current_memory, + ); let (callsite, _) = calls.get(&span.call_id).unwrap(); - let marker = SpanMarker { callsite, span: &span }; + let memory_delta = + begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin)); + let marker = SpanMarker { callsite, span: &span, memory_delta }; profile.add_marker_with_stack( *thread_handle, @@ -216,6 +147,77 @@ pub fn to_firefox_profile( Ok(profile) } +struct MemoryCounterHandles { + allocations: CounterHandle, + deallocations: CounterHandle, + reallocations: CounterHandle, +} + +impl MemoryCounterHandles { + fn new(profile: &mut Profile, main: ProcessHandle) -> Self { + let allocations = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of allocated memory"); + let deallocations = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of deallocated memory"); + let reallocations = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of reallocated memory"); + Self { allocations, deallocations, reallocations } + } +} + +fn add_memory_samples( + profile: &mut Profile, + main: ProcessHandle, + memory: Option, + last_timestamp: Timestamp, + memory_counters: &mut Option, + current_memory: &mut MemoryStats, +) { + let Some(stats) = memory else { + return; + }; + + let Some(MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + }) = stats.checked_sub(*current_memory) + else { + // since spans are recorded out-of-order it is possible they are not always monotonic. + // We ignore spans that made no difference. + return; + }; + + let memory_counters = + memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main)); + + profile.add_counter_sample( + memory_counters.allocations, + last_timestamp, + bytes_allocated as f64, + allocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + memory_counters.deallocations, + last_timestamp, + bytes_deallocated as f64, + deallocations.try_into().unwrap(), + ); + + profile.add_counter_sample( + memory_counters.reallocations, + last_timestamp, + bytes_reallocated as f64, + reallocations.try_into().unwrap(), + ); + + *current_memory = stats; +} + fn to_timestamp(time: std::time::Duration) -> Timestamp { Timestamp::from_nanos_since_reference(time.as_nanos() as u64) } @@ -252,12 +254,13 @@ fn make_frame( #[derive(Debug, Clone, Copy)] enum SpanStatus { Outside, - Inside(std::time::Duration), + Inside { time: std::time::Duration, memory: Option }, } struct SpanMarker<'a> { span: &'a NewSpan, callsite: &'a NewCallsite, + memory_delta: Option, } impl<'a> ProfilerMarker for SpanMarker<'a> { @@ -295,6 +298,42 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { format: MarkerFieldFormat::Integer, searchable: true, }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "allocations", + label: "Number of allocation operations while this function was executing", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "deallocations", + label: "Number of deallocation operations while this function was executing", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "reallocations", + label: "Number of reallocation operations while this function was executing", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "allocated_bytes", + label: "Number of allocated bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "deallocated_bytes", + label: "Number of deallocated bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "reallocated_bytes", + label: "Number of reallocated bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), ]; MarkerSchema { @@ -317,13 +356,33 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { let module_path = self.callsite.module_path.as_deref(); let span_id = self.span.id; let thread_id = self.span.thread_id; - json!({ + + let mut value = json!({ "type": Self::MARKER_TYPE_NAME, "filename": filename, "line": line, "module_path": module_path, "span_id": span_id, "thread_id": thread_id, - }) + }); + + if let Some(MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + }) = self.memory_delta + { + value["allocations"] = json!(allocations); + value["deallocations"] = json!(deallocations); + value["reallocations"] = json!(reallocations); + value["bytes_allocated"] = json!(bytes_allocated); + value["bytes_deallocated"] = json!(bytes_deallocated); + value["bytes_reallocated"] = json!(bytes_reallocated); + } + + value } } From cc79cd0b049e123e56f20c0c5f49e73d070d0b7c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 13:03:30 +0100 Subject: [PATCH 23/87] Switch to a single view indicating current usage --- tracing-trace/src/entry.rs | 8 +++ .../src/processor/firefox_profiler.rs | 58 ++++--------------- 2 files changed, 20 insertions(+), 46 deletions(-) diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index d92ff09b8..34b3aaa52 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -133,6 +133,14 @@ impl MemoryStats { bytes_reallocated: self.bytes_reallocated.checked_sub(other.bytes_reallocated)?, }) } + + pub fn usage(&self) -> isize { + (self.bytes_allocated - self.bytes_deallocated) as isize + self.bytes_reallocated + } + + pub fn operations(&self) -> usize { + self.allocations + self.deallocations + self.reallocations + } } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index eda9f6467..dd56939a4 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -148,20 +148,14 @@ pub fn to_firefox_profile( } struct MemoryCounterHandles { - allocations: CounterHandle, - deallocations: CounterHandle, - reallocations: CounterHandle, + usage: CounterHandle, } impl MemoryCounterHandles { fn new(profile: &mut Profile, main: ProcessHandle) -> Self { - let allocations = - profile.add_counter(main, "mimmalloc", "Memory", "Amount of allocated memory"); - let deallocations = - profile.add_counter(main, "mimmalloc", "Memory", "Amount of deallocated memory"); - let reallocations = - profile.add_counter(main, "mimmalloc", "Memory", "Amount of reallocated memory"); - Self { allocations, deallocations, reallocations } + let usage = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use"); + Self { usage } } } @@ -171,51 +165,23 @@ fn add_memory_samples( memory: Option, last_timestamp: Timestamp, memory_counters: &mut Option, - current_memory: &mut MemoryStats, + last_memory: &mut MemoryStats, ) { let Some(stats) = memory else { return; }; - let Some(MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - }) = stats.checked_sub(*current_memory) - else { - // since spans are recorded out-of-order it is possible they are not always monotonic. - // We ignore spans that made no difference. - return; - }; - let memory_counters = memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main)); profile.add_counter_sample( - memory_counters.allocations, + memory_counters.usage, last_timestamp, - bytes_allocated as f64, - allocations.try_into().unwrap(), + stats.usage() as f64 - last_memory.usage() as f64, + stats.operations().checked_sub(last_memory.operations()).unwrap_or_default() as u32, ); - profile.add_counter_sample( - memory_counters.deallocations, - last_timestamp, - bytes_deallocated as f64, - deallocations.try_into().unwrap(), - ); - - profile.add_counter_sample( - memory_counters.reallocations, - last_timestamp, - bytes_reallocated as f64, - reallocations.try_into().unwrap(), - ); - - *current_memory = stats; + *last_memory = stats; } fn to_timestamp(time: std::time::Duration) -> Timestamp { @@ -378,9 +344,9 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { value["allocations"] = json!(allocations); value["deallocations"] = json!(deallocations); value["reallocations"] = json!(reallocations); - value["bytes_allocated"] = json!(bytes_allocated); - value["bytes_deallocated"] = json!(bytes_deallocated); - value["bytes_reallocated"] = json!(bytes_reallocated); + value["allocated_bytes"] = json!(bytes_allocated); + value["deallocated_bytes"] = json!(bytes_deallocated); + value["reallocated_bytes"] = json!(bytes_reallocated); } value From b141c82a04b9e2c5ca382e904925aa6765878718 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 16:25:05 +0100 Subject: [PATCH 24/87] Support Events in trace layer --- tracing-trace/src/entry.rs | 12 ++ tracing-trace/src/layer.rs | 35 +++- .../src/processor/firefox_profiler.rs | 196 +++++++++++++++++- tracing-trace/src/processor/fmt.rs | 44 +++- 4 files changed, 275 insertions(+), 12 deletions(-) diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index 34b3aaa52..61151b04c 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -33,6 +33,9 @@ pub enum Entry { /// A call ended SpanClose(SpanClose), + + /// An event occurred + Event(Event), } #[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] @@ -74,6 +77,15 @@ pub struct SpanExit { pub memory: Option, } +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct Event { + pub call_id: ResourceId, + pub thread_id: ResourceId, + pub parent_id: Option, + pub time: std::time::Duration, + pub memory: Option, +} + #[derive(Clone, Copy, Debug, Serialize, Deserialize)] pub struct NewSpan { pub id: SpanId, diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index 66d54c674..e3573fdd3 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -12,7 +12,8 @@ use tracing_subscriber::layer::Context; use tracing_subscriber::Layer; use crate::entry::{ - Entry, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, }; use crate::{Error, Trace}; @@ -98,6 +99,10 @@ impl TraceLayer { self.start_time.elapsed() } + fn memory_stats(&self) -> Option { + self.memory_allocator.map(|ma| ma.stats().into()) + } + fn send(&self, entry: Entry) { // we never care that the other end hanged on us let _ = self.sender.send(entry); @@ -159,7 +164,7 @@ where self.send(Entry::SpanEnter(SpanEnter { id: id.into(), time: self.elapsed(), - memory: self.memory_allocator.map(|ma| ma.stats().into()), + memory: self.memory_stats(), })) } @@ -167,7 +172,31 @@ where self.send(Entry::SpanExit(SpanExit { id: id.into(), time: self.elapsed(), - memory: self.memory_allocator.map(|ma| ma.stats().into()), + memory: self.memory_stats(), + })) + } + + fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(event.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(event.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = event + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::Event(Event { + call_id, + thread_id, + parent_id, + time: self.elapsed(), + memory: self.memory_stats(), })) } diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index dd56939a4..126b4af1a 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -8,7 +8,8 @@ use fxprof_processed_profile::{ use serde_json::json; use crate::entry::{ - Entry, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, + Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, + SpanId, }; use crate::{Error, TraceReader}; @@ -32,7 +33,7 @@ pub fn to_firefox_profile( let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue); let subcategory = profile.add_subcategory(category, "subcategory"); - let mut current_memory = MemoryStats::default(); + let mut last_memory = MemoryStats::default(); let mut memory_counters = None; @@ -75,7 +76,7 @@ pub fn to_firefox_profile( memory, last_timestamp, &mut memory_counters, - &mut current_memory, + &mut last_memory, ); } Entry::SpanExit(SpanExit { id, time, memory }) => { @@ -117,7 +118,7 @@ pub fn to_firefox_profile( memory, last_timestamp, &mut memory_counters, - &mut current_memory, + &mut last_memory, ); let (callsite, _) = calls.get(&span.call_id).unwrap(); @@ -137,9 +138,58 @@ pub fn to_firefox_profile( frames.iter().rev().cloned(), ) } + Entry::Event(event) => { + let span = event + .parent_id + .as_ref() + .and_then(|parent_id| spans.get(parent_id)) + .and_then(|(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside { .. } => Some(span), + }) + .copied(); + let timestamp = to_timestamp(event.time); + + let thread_handle = threads.get(&event.thread_id).unwrap(); + + let frames = span + .map(|span| make_frames(span, &spans, &calls, subcategory)) + .unwrap_or_default(); + + profile.add_sample( + *thread_handle, + timestamp, + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + + let memory_delta = add_memory_samples( + &mut profile, + main, + event.memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + + let (callsite, _) = calls.get(&event.call_id).unwrap(); + + let marker = EventMarker { callsite, event: &event, memory_delta }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Instant(timestamp), + frames.iter().rev().cloned(), + ); + + last_timestamp = timestamp; + } Entry::SpanClose(SpanClose { id, time }) => { spans.remove(&id); - last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + last_timestamp = to_timestamp(time); } } } @@ -166,9 +216,9 @@ fn add_memory_samples( last_timestamp: Timestamp, memory_counters: &mut Option, last_memory: &mut MemoryStats, -) { +) -> Option { let Some(stats) = memory else { - return; + return None; }; let memory_counters = @@ -181,7 +231,9 @@ fn add_memory_samples( stats.operations().checked_sub(last_memory.operations()).unwrap_or_default() as u32, ); + let delta = stats.checked_sub(*last_memory); *last_memory = stats; + delta } fn to_timestamp(time: std::time::Duration) -> Timestamp { @@ -352,3 +404,133 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { value } } + +struct EventMarker<'a> { + event: &'a Event, + callsite: &'a NewCallsite, + memory_delta: Option, +} + +impl<'a> ProfilerMarker for EventMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "tracing-event"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "parent_span_id", + label: "Parent Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "allocations", + label: "Number of allocation operations since last measure", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "deallocations", + label: "Number of deallocation operations since last measure", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "reallocations", + label: "Number of reallocation operations since last measure", + format: MarkerFieldFormat::Integer, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "allocated_bytes", + label: "Number of allocated bytes since last measure", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "deallocated_bytes", + label: "Number of deallocated bytes since last measure", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "reallocated_bytes", + label: "Number of reallocated bytes since last measure", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.event.parent_id; + let thread_id = self.event.thread_id; + + let mut value = json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "parent_span_id": span_id, + "thread_id": thread_id, + }); + + if let Some(MemoryStats { + allocations, + deallocations, + reallocations, + bytes_allocated, + bytes_deallocated, + bytes_reallocated, + }) = self.memory_delta + { + value["allocations"] = json!(allocations); + value["deallocations"] = json!(deallocations); + value["reallocations"] = json!(reallocations); + value["allocated_bytes"] = json!(bytes_allocated); + value["deallocated_bytes"] = json!(bytes_deallocated); + value["reallocated_bytes"] = json!(bytes_reallocated); + } + + value + } +} diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index 31fc1d6cb..166930dfc 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::io::Read; use crate::entry::{ - Entry, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, }; use crate::{Error, TraceReader}; @@ -89,6 +89,42 @@ pub fn print_trace(trace: TraceReader) -> Result<(), Error> { Entry::SpanClose(SpanClose { id, time: _ }) => { spans.remove(&id); } + Entry::Event(Event { call_id, thread_id, parent_id, time: _, memory }) => { + let parent_span = parent_id.and_then(|parent_id| spans.get(&parent_id)).and_then( + |(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside(_) => Some(span), + }, + ); + match (parent_span, memory) { + (Some(parent_span), Some(stats)) => println!( + "[{}]{}::{} ({}) event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_memory(stats), + print_call(&calls, call_id) + ), + (Some(parent_span), None) => println!( + "[{}]{}::{} event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_call(&calls, call_id) + ), + (None, None) => println!( + "[{}] event: {}", + print_thread(&threads, thread_id), + print_call(&calls, call_id) + ), + (None, Some(stats)) => println!( + "[{}] ({}) event: {}", + print_thread(&threads, thread_id), + print_memory(stats), + print_call(&calls, call_id) + ), + } + } } } Ok(()) @@ -121,7 +157,11 @@ fn print_backtrace( } fn print_span(calls: &HashMap, span: &NewSpan) -> String { - let callsite = calls.get(&span.call_id).unwrap(); + print_call(calls, span.call_id) +} + +fn print_call(calls: &HashMap, call_id: ResourceId) -> String { + let callsite = calls.get(&call_id).unwrap(); match (callsite.file.clone(), callsite.line) { (Some(file), None) => format!("{} ({})", callsite.name, file), (Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line), From 8febbf64ceb2173996931ad58ff5e1bb253b740c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 16:52:48 +0100 Subject: [PATCH 25/87] Switch to tokio channel --- Cargo.lock | 1 + tracing-trace/Cargo.toml | 1 + tracing-trace/src/layer.rs | 19 ++++++++++++++----- tracing-trace/src/lib.rs | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8ef99a0cc..f3821c94a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5716,6 +5716,7 @@ dependencies = [ "serde", "serde_json", "stats_alloc", + "tokio", "tracing", "tracing-error", "tracing-subscriber", diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index 04b7494d4..da5e2b36c 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -18,4 +18,5 @@ byte-unit = { version = "4.0.19", default-features = false, features = [ "std", "serde", ] } +tokio = { version = "1.35.1", features = ["sync"] } diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index e3573fdd3..1b9aadfa7 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -19,7 +19,7 @@ use crate::{Error, Trace}; /// Layer that measures the time spent in spans. pub struct TraceLayer { - sender: std::sync::mpsc::Sender, + sender: tokio::sync::mpsc::UnboundedSender, callsites: RwLock>, start_time: std::time::Instant, memory_allocator: Option<&'static StatsAlloc>, @@ -27,7 +27,7 @@ pub struct TraceLayer { impl Trace { pub fn new(writer: W) -> (Self, TraceLayer) { - let (sender, receiver) = std::sync::mpsc::channel(); + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let trace = Trace { writer, receiver }; let layer = TraceLayer { sender, @@ -42,7 +42,7 @@ impl Trace { writer: W, stats_alloc: &'static StatsAlloc, ) -> (Self, TraceLayer) { - let (sender, receiver) = std::sync::mpsc::channel(); + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let trace = Trace { writer, receiver }; let layer = TraceLayer { sender, @@ -53,8 +53,17 @@ impl Trace { (trace, layer) } - pub fn receive(&mut self) -> Result, Error> { - let Ok(entry) = self.receiver.recv() else { + pub async fn receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.recv().await else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + /// Panics if called from an asynchronous context + pub fn blocking_receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.blocking_recv() else { return Ok(ControlFlow::Break(())); }; self.write(entry)?; diff --git a/tracing-trace/src/lib.rs b/tracing-trace/src/lib.rs index 5e0f46d47..3d00eef10 100644 --- a/tracing-trace/src/lib.rs +++ b/tracing-trace/src/lib.rs @@ -11,7 +11,7 @@ pub use error::Error; pub struct Trace { writer: W, - receiver: std::sync::mpsc::Receiver, + receiver: tokio::sync::mpsc::UnboundedReceiver, } pub struct TraceReader { From 6e23040464a1db90e757b36d52a13711efc2cdc2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 23 Jan 2024 16:53:05 +0100 Subject: [PATCH 26/87] Use with tokio channel in Meilisearch --- meilisearch/src/main.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 373280e58..75308258e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -57,13 +57,13 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { ), ); - std::thread::spawn(move || { + tokio::task::spawn(async move { loop { - trace.flush().unwrap(); - match trace.receive() { - Ok(ControlFlow::Continue(_)) => continue, - Ok(ControlFlow::Break(_)) => break, - Err(_) => todo!(), + match tokio::time::timeout(std::time::Duration::from_secs(1), trace.receive()).await { + Ok(Ok(ControlFlow::Continue(()))) => continue, + Ok(Ok(ControlFlow::Break(_))) => break, + Ok(Err(_)) => todo!(), + Err(_) => trace.flush().unwrap(), } } while trace.try_receive().is_ok() {} From a61d8c59ff198c6ad6081e5f584a90efb01be0b0 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 24 Jan 2024 09:50:51 +0100 Subject: [PATCH 27/87] Add span stats processor --- tracing-trace/src/bin/trace-to-callstats.rs | 16 +++++ tracing-trace/src/processor/mod.rs | 1 + tracing-trace/src/processor/span_stats.rs | 79 +++++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 tracing-trace/src/bin/trace-to-callstats.rs create mode 100644 tracing-trace/src/processor/span_stats.rs diff --git a/tracing-trace/src/bin/trace-to-callstats.rs b/tracing-trace/src/bin/trace-to-callstats.rs new file mode 100644 index 000000000..3644b7bff --- /dev/null +++ b/tracing-trace/src/bin/trace-to-callstats.rs @@ -0,0 +1,16 @@ +use std::ffi::OsString; +use std::io::Write; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap(); + let mut output_file = OsString::new(); + output_file.push("callstats-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + serde_json::to_writer(&mut output_file, &profile).unwrap(); + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/processor/mod.rs b/tracing-trace/src/processor/mod.rs index a84cb3b63..ea445b0a5 100644 --- a/tracing-trace/src/processor/mod.rs +++ b/tracing-trace/src/processor/mod.rs @@ -1,2 +1,3 @@ pub mod firefox_profiler; pub mod fmt; +pub mod span_stats; diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs new file mode 100644 index 000000000..63b6ae5c1 --- /dev/null +++ b/tracing-trace/src/processor/span_stats.rs @@ -0,0 +1,79 @@ +use std::collections::{BTreeMap, HashMap}; +use std::time::Duration; + +use serde::Serialize; + +use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +#[derive(Serialize)] +pub struct CallStats { + nb: usize, + ns: u64, +} + +pub fn to_call_stats( + trace: TraceReader, +) -> Result, Error> { + let mut calls = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, (callsite, vec![])); + } + Entry::NewThread(_) => {} + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => { + let (_, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + } + Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + let (_, call_list) = calls.get_mut(&span.call_id).unwrap(); + call_list.push(end - begin); + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + Entry::Event(_) => {} + } + } + + Ok(calls + .into_iter() + .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls))) + .collect()) +} + +fn site_to_string(call_site: NewCallsite) -> String { + format!("{}::{}", call_site.target, call_site.name) +} +fn calls_to_stats(calls: Vec) -> CallStats { + let nb = calls.len(); + let sum: Duration = calls.iter().sum(); + CallStats { nb, ns: sum.as_nanos() as u64 } +} From dedc91e2cf6a9ad5dd312a2c917fc3ad2dac0d83 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 24 Jan 2024 11:16:41 +0100 Subject: [PATCH 28/87] use json lines --- tracing-trace/src/bin/trace-to-callstats.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tracing-trace/src/bin/trace-to-callstats.rs b/tracing-trace/src/bin/trace-to-callstats.rs index 3644b7bff..631cdbb45 100644 --- a/tracing-trace/src/bin/trace-to-callstats.rs +++ b/tracing-trace/src/bin/trace-to-callstats.rs @@ -1,6 +1,8 @@ use std::ffi::OsString; use std::io::Write; +use serde_json::json; + fn main() { let input_file = std::env::args_os().nth(1).expect("missing file"); let input = @@ -11,6 +13,9 @@ fn main() { output_file.push("callstats-"); output_file.push(input_file); let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); - serde_json::to_writer(&mut output_file, &profile).unwrap(); + for (key, value) in profile { + serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap(); + writeln!(&mut output_file).unwrap(); + } output_file.flush().unwrap(); } From 2f1abd2c034bafa3b0a4cdfeaebcb93cb843f7ba Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 24 Jan 2024 15:13:34 +0100 Subject: [PATCH 29/87] nelson is not used anymore --- Cargo.lock | 6 ------ index-scheduler/Cargo.toml | 1 - 2 files changed, 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f3821c94a..866912cc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2907,7 +2907,6 @@ dependencies = [ "meili-snap", "meilisearch-auth", "meilisearch-types", - "nelson", "page_size 0.5.0", "puffin", "roaring", @@ -3921,11 +3920,6 @@ dependencies = [ "syn 2.0.48", ] -[[package]] -name = "nelson" -version = "0.1.0" -source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a" - [[package]] name = "nom" version = "7.1.3" diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 18e7d78c6..4e7fd1b64 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -45,4 +45,3 @@ big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.34.0", features = ["json", "redactions"] } meili-snap = { path = "../meili-snap" } -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a" } From f76cc0806eab9d3a0f5ac6660c17ae1c90d164fc Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 25 Jan 2024 18:09:50 +0100 Subject: [PATCH 30/87] WIP: first draft at introducing a new log route --- meilisearch/src/routes/logs.rs | 137 +++++++++++++++++++++++++++++++++ meilisearch/src/routes/mod.rs | 2 + 2 files changed, 139 insertions(+) create mode 100644 meilisearch/src/routes/logs.rs diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs new file mode 100644 index 000000000..3201fc5b7 --- /dev/null +++ b/meilisearch/src/routes/logs.rs @@ -0,0 +1,137 @@ +use std::fmt; +use std::io::Write; +use std::pin::Pin; +use std::str::FromStr; +use std::task::Poll; + +use actix_web::web::{Bytes, Data}; +use actix_web::{web, HttpRequest, HttpResponse}; +use deserr::actix_web::AwebJson; +use deserr::Deserr; +use futures_util::{pin_mut, FutureExt}; +use meilisearch_auth::AuthController; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::ResponseError; +use tokio::pin; +use tokio::sync::mpsc; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Layer; + +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(SeqHandler(get_logs)))); +} + +#[derive(Debug, Default, Clone, Copy, Deserr)] +#[serde(rename_all = "lowercase")] +pub enum LogLevel { + Error, + Warn, + #[default] + Info, + Debug, + Trace, +} + +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct GetLogs { + #[deserr(default, error = DeserrJsonError)] + pub level: LogLevel, +} + +impl fmt::Display for LogLevel { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + LogLevel::Error => f.write_str("error"), + LogLevel::Warn => f.write_str("warn"), + LogLevel::Info => f.write_str("info"), + LogLevel::Debug => f.write_str("debug"), + LogLevel::Trace => f.write_str("trace"), + } + } +} + +struct LogWriter { + sender: mpsc::UnboundedSender>, +} + +impl Write for LogWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?; + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +struct LogStreamer { + receiver: mpsc::UnboundedReceiver>, + // We just need to hold the guard until the struct is dropped + #[allow(unused)] + subscriber: tracing::subscriber::DefaultGuard, +} + +impl futures_util::Stream for LogStreamer { + type Item = Result; + + fn poll_next( + self: Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + let future = self.get_mut().receiver.recv(); + pin_mut!(future); + + match future.poll_unpin(cx) { + std::task::Poll::Ready(recv) => match recv { + Some(buf) => { + // let bytes = Bytes::copy_from_slice(buf.as_slice()); + Poll::Ready(Some(Ok(buf.into()))) + } + None => Poll::Ready(None), + }, + Poll::Pending => Poll::Pending, + } + } +} + +pub async fn get_logs( + _auth_controller: GuardedData, Data>, + body: AwebJson, + _req: HttpRequest, +) -> Result { + let opt = body.into_inner(); + + // #[cfg(not(feature = "stats_alloc"))] + // let (mut trace, layer) = tracing_trace::Trace::new(file); + // #[cfg(feature = "stats_alloc")] + // let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); + + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + + let subscriber = tracing_subscriber::registry().with( + tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), + ), + ); + // .with( + // layer.with_filter( + // tracing_subscriber::filter::Targets::new() + // .with_target("indexing::", tracing::Level::TRACE), + // ), + // ); + + let subscriber = tracing::subscriber::set_default(subscriber); + + Ok(HttpResponse::Ok().streaming(LogStreamer { receiver, subscriber })) +} diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 0e5623b09..546c5e219 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -22,6 +22,7 @@ mod api_key; mod dump; pub mod features; pub mod indexes; +mod logs; mod metrics; mod multi_search; mod snapshot; @@ -31,6 +32,7 @@ pub mod tasks; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::scope("/tasks").configure(tasks::configure)) .service(web::resource("/health").route(web::get().to(get_health))) + .service(web::scope("/logs").configure(logs::configure)) .service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/snapshots").configure(snapshot::configure)) From 50f84d43f5eebbe13c7a1ab6ca44b4cced74371e Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 Jan 2024 13:36:10 +0100 Subject: [PATCH 31/87] init commit --- meilisearch/src/main.rs | 9 +++++++++ meilisearch/src/routes/logs.rs | 18 +++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 75308258e..863db22d0 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -27,6 +27,10 @@ static ALLOC: MiMalloc = MiMalloc; #[global_allocator] static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); +fn f() -> Option>> { + None +} + /// does all the setup before meilisearch is launched fn setup(opt: &Opt) -> anyhow::Result<()> { let now = time::OffsetDateTime::now_utc(); @@ -40,7 +44,12 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[cfg(feature = "stats_alloc")] let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); + // let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(vec![]); + let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(f()); + let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; + let subscriber = tracing_subscriber::registry() + .with(route_layer) .with( tracing_subscriber::fmt::layer() .with_line_number(true) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 3201fc5b7..a20dac8bb 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -115,15 +115,15 @@ pub async fn get_logs( let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let subscriber = tracing_subscriber::registry().with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_writer(move || LogWriter { sender: sender.clone() }) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .with_filter( - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), - ), - ); + let layer = tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), + ); + + let subscriber = tracing_subscriber::registry().with(layer); // .with( // layer.with_filter( // tracing_subscriber::filter::Targets::new() From bcf1c4dae53a1c108832406e2df07945f9f40e56 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 29 Jan 2024 17:56:43 +0100 Subject: [PATCH 32/87] make it compile and runtime error --- meilisearch-types/src/error.rs | 2 + meilisearch/src/error.rs | 4 ++ meilisearch/src/lib.rs | 10 +++++ meilisearch/src/main.rs | 17 ++++---- meilisearch/src/routes/logs.rs | 71 ++++++++++++++++++++++++---------- 5 files changed, 75 insertions(+), 29 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 1b54e77c0..965d2e672 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; + +// Experimental features VectorEmbeddingError , InvalidRequest , BAD_REQUEST } diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 3bd8f3edd..ee54cf831 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -12,6 +12,8 @@ pub enum MeilisearchHttpError { #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", "))] MissingContentType(Vec), + #[error("Log route is currently used by someone else.")] + AlreadyUsedLogRoute, #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] CsvDelimiterWithWrongContentType(String), #[error( @@ -59,6 +61,8 @@ impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + /// TODO: TAMO: create a new error code + MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index f1111962c..fb2874472 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -86,10 +86,17 @@ fn is_empty_db(db_path: impl AsRef) -> bool { } } +/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`. +pub type LogRouteHandle = + tracing_subscriber::reload::Handle, tracing_subscriber::Registry>; +pub type LogRouteType = + Box + Sync + Send>; + pub fn create_app( index_scheduler: Data, auth_controller: Data, opt: Opt, + logs: LogRouteHandle, analytics: Arc, enable_dashboard: bool, ) -> actix_web::App< @@ -108,6 +115,7 @@ pub fn create_app( index_scheduler.clone(), auth_controller.clone(), &opt, + logs, analytics.clone(), ) }) @@ -391,6 +399,7 @@ pub fn configure_data( index_scheduler: Data, auth: Data, opt: &Opt, + logs: LogRouteHandle, analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; @@ -398,6 +407,7 @@ pub fn configure_data( .app_data(index_scheduler) .app_data(auth) .app_data(web::Data::from(analytics)) + .app_data(web::Data::new(logs)) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 863db22d0..5750e222d 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -12,7 +12,7 @@ use anyhow::Context; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; -use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; +use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, Opt}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; @@ -27,12 +27,12 @@ static ALLOC: MiMalloc = MiMalloc; #[global_allocator] static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); -fn f() -> Option>> { +fn f() -> Option + Send + Sync>> { None } /// does all the setup before meilisearch is launched -fn setup(opt: &Opt) -> anyhow::Result<()> { +fn setup(opt: &Opt) -> anyhow::Result { let now = time::OffsetDateTime::now_utc(); let format = time::format_description::parse("[year]-[month]-[day]_[hour]:[minute]:[second]")?; let trace_file = format!("{}-indexing-trace.json", now.format(&format)?); @@ -44,12 +44,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[cfg(feature = "stats_alloc")] let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); - // let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(vec![]); let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(f()); let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; let subscriber = tracing_subscriber::registry() - .with(route_layer) + .with(route_layer.boxed()) .with( tracing_subscriber::fmt::layer() .with_line_number(true) @@ -82,7 +81,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { // set the subscriber as the default for the application tracing::subscriber::set_global_default(subscriber).unwrap(); - Ok(()) + Ok(route_layer_handle) } #[actix_web::main] @@ -94,7 +93,7 @@ async fn main() -> anyhow::Result<()> { "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" ); - setup(&opt)?; + let log_handle = setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { @@ -132,7 +131,7 @@ async fn main() -> anyhow::Result<()> { print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(index_scheduler, auth_controller, opt, analytics).await?; + run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; Ok(()) } @@ -141,6 +140,7 @@ async fn run_http( index_scheduler: Arc, auth_controller: Arc, opt: Opt, + logs: LogRouteHandle, analytics: Arc, ) -> anyhow::Result<()> { let enable_dashboard = &opt.env == "development"; @@ -153,6 +153,7 @@ async fn run_http( index_scheduler.clone(), auth_controller.clone(), opt.clone(), + logs.clone(), analytics.clone(), enable_dashboard, ) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index a20dac8bb..e5f5ae091 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -13,14 +13,15 @@ use meilisearch_auth::AuthController; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; -use tokio::pin; -use tokio::sync::mpsc; +use tokio::sync::mpsc::{self, UnboundedSender}; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::Layer; +use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::{LogRouteHandle, LogRouteType}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(get_logs)))); @@ -73,9 +74,6 @@ impl Write for LogWriter { struct LogStreamer { receiver: mpsc::UnboundedReceiver>, - // We just need to hold the guard until the struct is dropped - #[allow(unused)] - subscriber: tracing::subscriber::DefaultGuard, } impl futures_util::Stream for LogStreamer { @@ -101,8 +99,27 @@ impl futures_util::Stream for LogStreamer { } } +pub fn make_subscriber< + S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, +>( + opt: &GetLogs, + sender: UnboundedSender>, +) -> Box + Send + Sync> { + let fmt_layer = tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), + ); + // let subscriber = tracing_subscriber::registry().with(fmt_layer); + + Box::new(fmt_layer) as Box + Send + Sync> +} + pub async fn get_logs( _auth_controller: GuardedData, Data>, + logs: Data, body: AwebJson, _req: HttpRequest, ) -> Result { @@ -115,23 +132,35 @@ pub async fn get_logs( let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let layer = tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_writer(move || LogWriter { sender: sender.clone() }) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .with_filter( - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), - ); + // let fmt_layer = tracing_subscriber::fmt::layer() + // .with_line_number(true) + // .with_writer(move || LogWriter { sender: sender.clone() }) + // .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + // .with_filter( + // tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), + // ); + // let subscriber = tracing_subscriber::registry().with(fmt_layer); + // let subscriber = Box::new(subscriber) as Box + Send + Sync>; - let subscriber = tracing_subscriber::registry().with(layer); - // .with( - // layer.with_filter( - // tracing_subscriber::filter::Targets::new() - // .with_target("indexing::", tracing::Level::TRACE), - // ), - // ); + let mut was_available = false; - let subscriber = tracing::subscriber::set_default(subscriber); + logs.modify(|layer| match layer { + None => { + was_available = true; + // there is already someone getting logs + let subscriber = make_subscriber(&opt, sender); + *layer = Some(subscriber) + } + Some(_) => { + // there is already someone getting logs + was_available = false; + } + }) + .unwrap(); - Ok(HttpResponse::Ok().streaming(LogStreamer { receiver, subscriber })) + if was_available { + Ok(HttpResponse::Ok().streaming(LogStreamer { receiver })) + } else { + Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) + } } From 5e52107474c2ce2550f6610c8064c0d6d258a6bc Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 29 Jan 2024 18:45:55 +0100 Subject: [PATCH 33/87] better than before??? --- meilisearch/src/lib.rs | 9 ++++++--- meilisearch/src/main.rs | 12 +++++++----- meilisearch/src/routes/logs.rs | 18 +++++++++--------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index fb2874472..435a6518b 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -88,9 +88,12 @@ fn is_empty_db(db_path: impl AsRef) -> bool { /// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`. pub type LogRouteHandle = - tracing_subscriber::reload::Handle, tracing_subscriber::Registry>; -pub type LogRouteType = - Box + Sync + Send>; + tracing_subscriber::reload::Handle; +pub type LogRouteType = tracing_subscriber::filter::Filtered< + Option + Send + Sync>>, + tracing_subscriber::filter::LevelFilter, + tracing_subscriber::Registry, +>; pub fn create_app( index_scheduler: Data, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 5750e222d..23e1aab5a 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -16,7 +16,8 @@ use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, LogR use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; -use tracing_subscriber::layer::SubscriberExt as _; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::{Filter, SubscriberExt as _}; use tracing_subscriber::Layer; #[cfg(not(feature = "stats_alloc"))] @@ -27,8 +28,9 @@ static ALLOC: MiMalloc = MiMalloc; #[global_allocator] static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); -fn f() -> Option + Send + Sync>> { - None +fn default_layer( +) -> tracing_subscriber::filter::Filtered + Send + Sync>>, LevelFilter, S> { + None.with_filter(tracing_subscriber::filter::LevelFilter::OFF) } /// does all the setup before meilisearch is launched @@ -44,11 +46,11 @@ fn setup(opt: &Opt) -> anyhow::Result { #[cfg(feature = "stats_alloc")] let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); - let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(f()); + let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer()); let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; let subscriber = tracing_subscriber::registry() - .with(route_layer.boxed()) + .with(route_layer) .with( tracing_subscriber::fmt::layer() .with_line_number(true) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index e5f5ae091..411eb3663 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -99,7 +99,7 @@ impl futures_util::Stream for LogStreamer { } } -pub fn make_subscriber< +pub fn make_layer< S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, >( opt: &GetLogs, @@ -108,10 +108,7 @@ pub fn make_subscriber< let fmt_layer = tracing_subscriber::fmt::layer() .with_line_number(true) .with_writer(move || LogWriter { sender: sender.clone() }) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .with_filter( - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), - ); + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); // let subscriber = tracing_subscriber::registry().with(fmt_layer); Box::new(fmt_layer) as Box + Send + Sync> @@ -144,12 +141,15 @@ pub async fn get_logs( let mut was_available = false; - logs.modify(|layer| match layer { + logs.modify(|layer| match layer.inner_mut() { None => { was_available = true; - // there is already someone getting logs - let subscriber = make_subscriber(&opt, sender); - *layer = Some(subscriber) + *layer.filter_mut() = + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(); + // there is no one getting logs + let new_layer = make_layer(&opt, sender); + + *layer.inner_mut() = Some(new_layer) } Some(_) => { // there is already someone getting logs From b8da117b9c0cabd4c98bf410b37d7b3b8ed41064 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 30 Jan 2024 12:27:49 +0100 Subject: [PATCH 34/87] Simplify stream implementation --- meilisearch/src/routes/logs.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 411eb3663..2bf4ecce7 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -99,6 +99,16 @@ impl futures_util::Stream for LogStreamer { } } +impl LogStreamer { + pub fn into_stream(self) -> impl futures_util::Stream> { + futures_util::stream::unfold(self, move |mut this| async move { + let vec = this.receiver.recv().await; + + vec.map(From::from).map(Ok).map(|a| (a, this)) + }) + } +} + pub fn make_layer< S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, >( @@ -159,7 +169,7 @@ pub async fn get_logs( .unwrap(); if was_available { - Ok(HttpResponse::Ok().streaming(LogStreamer { receiver })) + Ok(HttpResponse::Ok().streaming(LogStreamer { receiver }.into_stream())) } else { Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) } From 73e66d5a97cdeee3c9c2a5d781e6a006689cbb26 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 30 Jan 2024 12:28:03 +0100 Subject: [PATCH 35/87] Add dummy log when calling tasks --- meilisearch/src/routes/tasks.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 03b63001d..9bb5892b5 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -263,6 +263,7 @@ async fn get_tasks( req: HttpRequest, analytics: web::Data, ) -> Result { + tracing::info!("You called tasks"); let mut params = params.into_inner(); analytics.get_tasks(¶ms, &req); From 3c3a258a22d80c6617dd09adea4b86e3c8fccdf3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 30 Jan 2024 14:19:46 +0100 Subject: [PATCH 36/87] start exposing the profiling layer --- meilisearch/src/routes/logs.rs | 105 +++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 38 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 2bf4ecce7..f8fa5a301 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -1,34 +1,32 @@ use std::fmt; use std::io::Write; -use std::pin::Pin; +use std::ops::ControlFlow; use std::str::FromStr; -use std::task::Poll; use actix_web::web::{Bytes, Data}; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::Deserr; -use futures_util::{pin_mut, FutureExt}; use meilisearch_auth::AuthController; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use tokio::sync::mpsc::{self, UnboundedSender}; -use tracing_subscriber::layer::SubscriberExt; +use tracing::instrument::WithSubscriber; use tracing_subscriber::Layer; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::{LogRouteHandle, LogRouteType}; +use crate::LogRouteHandle; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(get_logs)))); } #[derive(Debug, Default, Clone, Copy, Deserr)] -#[serde(rename_all = "lowercase")] +#[deserr(rename_all = lowercase)] pub enum LogLevel { Error, Warn, @@ -38,11 +36,22 @@ pub enum LogLevel { Trace, } +#[derive(Debug, Default, Clone, Copy, Deserr)] +#[deserr(rename_all = lowercase)] +pub enum LogMode { + #[default] + Fmt, + Profile, +} + #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct GetLogs { #[deserr(default, error = DeserrJsonError)] pub level: LogLevel, + + #[deserr(default, error = DeserrJsonError)] + pub mode: LogMode, } impl fmt::Display for LogLevel { @@ -76,29 +85,6 @@ struct LogStreamer { receiver: mpsc::UnboundedReceiver>, } -impl futures_util::Stream for LogStreamer { - type Item = Result; - - fn poll_next( - self: Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - ) -> Poll> { - let future = self.get_mut().receiver.recv(); - pin_mut!(future); - - match future.poll_unpin(cx) { - std::task::Poll::Ready(recv) => match recv { - Some(buf) => { - // let bytes = Bytes::copy_from_slice(buf.as_slice()); - Poll::Ready(Some(Ok(buf.into()))) - } - None => Poll::Ready(None), - }, - Poll::Pending => Poll::Pending, - } - } -} - impl LogStreamer { pub fn into_stream(self) -> impl futures_util::Stream> { futures_util::stream::unfold(self, move |mut this| async move { @@ -115,13 +101,38 @@ pub fn make_layer< opt: &GetLogs, sender: UnboundedSender>, ) -> Box + Send + Sync> { - let fmt_layer = tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_writer(move || LogWriter { sender: sender.clone() }) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); - // let subscriber = tracing_subscriber::registry().with(fmt_layer); + match opt.mode { + LogMode::Fmt => { + let fmt_layer = tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); - Box::new(fmt_layer) as Box + Send + Sync> + Box::new(fmt_layer) as Box + Send + Sync> + } + LogMode::Profile => { + let (mut trace, layer) = + tracing_trace::Trace::new(LogWriter { sender: sender.clone() }); + + tokio::task::spawn(async move { + loop { + match tokio::time::timeout(std::time::Duration::from_secs(1), trace.receive()) + .await + { + Ok(Ok(ControlFlow::Continue(()))) => continue, + Ok(Ok(ControlFlow::Break(_))) => break, + // the other half of the channel was dropped + Ok(Err(_)) => break, + Err(_) => trace.flush().unwrap(), + } + } + while trace.try_receive().is_ok() {} + trace.flush().unwrap(); + }); + + Box::new(layer) as Box + Send + Sync> + } + } } pub async fn get_logs( @@ -153,10 +164,28 @@ pub async fn get_logs( logs.modify(|layer| match layer.inner_mut() { None => { - was_available = true; - *layer.filter_mut() = - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(); // there is no one getting logs + was_available = true; + match opt.mode { + LogMode::Fmt => { + *layer.filter_mut() = + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()) + .unwrap(); + } + LogMode::Profile => { + *layer.filter_mut() = + tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()) + .unwrap(); + // *layer.filter_mut() = tracing_subscriber::filter::Targets::new() + // .with_target("indexing::", tracing::Level::TRACE) + // .with_filter( + // tracing_subscriber::filter::LevelFilter::from_str( + // &opt.level.to_string(), + // ) + // .unwrap(), + // ) + } + } let new_layer = make_layer(&opt, sender); *layer.inner_mut() = Some(new_layer) From abaa72e2bf65d26f29a9b2678bed8f19d05687c7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 30 Jan 2024 16:31:42 +0100 Subject: [PATCH 37/87] start handling reloads with profiling --- meilisearch/src/lib.rs | 4 ++- meilisearch/src/main.rs | 9 ++--- meilisearch/src/routes/logs.rs | 62 +++++++++++++--------------------- 3 files changed, 32 insertions(+), 43 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 435a6518b..2ebed39a3 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -39,6 +39,7 @@ use meilisearch_types::versioning::{check_version_file, create_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; +use tracing_subscriber::filter::Targets; use crate::error::MeilisearchHttpError; @@ -89,9 +90,10 @@ fn is_empty_db(db_path: impl AsRef) -> bool { /// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`. pub type LogRouteHandle = tracing_subscriber::reload::Handle; + pub type LogRouteType = tracing_subscriber::filter::Filtered< Option + Send + Sync>>, - tracing_subscriber::filter::LevelFilter, + Targets, tracing_subscriber::Registry, >; diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 23e1aab5a..097a4d9bb 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -12,7 +12,9 @@ use anyhow::Context; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; -use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, Opt}; +use meilisearch::{ + analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt, +}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; @@ -28,9 +30,8 @@ static ALLOC: MiMalloc = MiMalloc; #[global_allocator] static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); -fn default_layer( -) -> tracing_subscriber::filter::Filtered + Send + Sync>>, LevelFilter, S> { - None.with_filter(tracing_subscriber::filter::LevelFilter::OFF) +fn default_layer() -> LogRouteType { + None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF)) } /// does all the setup before meilisearch is launched diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index f8fa5a301..b40a34b91 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -2,6 +2,7 @@ use std::fmt; use std::io::Write; use std::ops::ControlFlow; use std::str::FromStr; +use std::sync::Arc; use actix_web::web::{Bytes, Data}; use actix_web::{web, HttpRequest, HttpResponse}; @@ -13,6 +14,7 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use tokio::sync::mpsc::{self, UnboundedSender}; use tracing::instrument::WithSubscriber; +use tracing_subscriber::filter::Targets; use tracing_subscriber::Layer; use crate::error::MeilisearchHttpError; @@ -48,7 +50,7 @@ pub enum LogMode { #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct GetLogs { #[deserr(default, error = DeserrJsonError)] - pub level: LogLevel, + pub target: String, #[deserr(default, error = DeserrJsonError)] pub mode: LogMode, @@ -70,6 +72,12 @@ struct LogWriter { sender: mpsc::UnboundedSender>, } +impl Drop for LogWriter { + fn drop(&mut self) { + println!("hello"); + } +} + impl Write for LogWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?; @@ -83,6 +91,17 @@ impl Write for LogWriter { struct LogStreamer { receiver: mpsc::UnboundedReceiver>, + /// We need to keep an handle on the logs to make it available again when the streamer is dropped + logs: Arc, +} + +impl Drop for LogStreamer { + fn drop(&mut self) { + println!("log streamer being dropped"); + if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + } } impl LogStreamer { @@ -142,50 +161,16 @@ pub async fn get_logs( _req: HttpRequest, ) -> Result { let opt = body.into_inner(); - - // #[cfg(not(feature = "stats_alloc"))] - // let (mut trace, layer) = tracing_trace::Trace::new(file); - // #[cfg(feature = "stats_alloc")] - // let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); - let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - // let fmt_layer = tracing_subscriber::fmt::layer() - // .with_line_number(true) - // .with_writer(move || LogWriter { sender: sender.clone() }) - // .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - // .with_filter( - // tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()).unwrap(), - // ); - // let subscriber = tracing_subscriber::registry().with(fmt_layer); - // let subscriber = Box::new(subscriber) as Box + Send + Sync>; - let mut was_available = false; logs.modify(|layer| match layer.inner_mut() { None => { // there is no one getting logs was_available = true; - match opt.mode { - LogMode::Fmt => { - *layer.filter_mut() = - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()) - .unwrap(); - } - LogMode::Profile => { - *layer.filter_mut() = - tracing_subscriber::filter::LevelFilter::from_str(&opt.level.to_string()) - .unwrap(); - // *layer.filter_mut() = tracing_subscriber::filter::Targets::new() - // .with_target("indexing::", tracing::Level::TRACE) - // .with_filter( - // tracing_subscriber::filter::LevelFilter::from_str( - // &opt.level.to_string(), - // ) - // .unwrap(), - // ) - } - } + *layer.filter_mut() = + tracing_subscriber::filter::Targets::from_str(&opt.target).unwrap(); let new_layer = make_layer(&opt, sender); *layer.inner_mut() = Some(new_layer) @@ -198,7 +183,8 @@ pub async fn get_logs( .unwrap(); if was_available { - Ok(HttpResponse::Ok().streaming(LogStreamer { receiver }.into_stream())) + Ok(HttpResponse::Ok() + .streaming(LogStreamer { receiver, logs: logs.into_inner() }.into_stream())) } else { Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) } From 91a8f7476330da2496cd2eca64175ccee491c34a Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 30 Jan 2024 18:15:53 +0100 Subject: [PATCH 38/87] Add cancel log route --- meilisearch/src/routes/logs.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index b40a34b91..ca1ca2d2f 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -13,8 +13,6 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use tokio::sync::mpsc::{self, UnboundedSender}; -use tracing::instrument::WithSubscriber; -use tracing_subscriber::filter::Targets; use tracing_subscriber::Layer; use crate::error::MeilisearchHttpError; @@ -24,7 +22,11 @@ use crate::extractors::sequential_extractor::SeqHandler; use crate::LogRouteHandle; pub fn configure(cfg: &mut web::ServiceConfig) { - cfg.service(web::resource("").route(web::post().to(SeqHandler(get_logs)))); + cfg.service( + web::resource("") + .route(web::post().to(SeqHandler(get_logs))) + .route(web::delete().to(SeqHandler(cancel_logs))), + ); } #[derive(Debug, Default, Clone, Copy, Deserr)] @@ -189,3 +191,15 @@ pub async fn get_logs( Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) } } + +pub async fn cancel_logs( + _auth_controller: GuardedData, Data>, + logs: Data, + _req: HttpRequest, +) -> Result { + if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + + Ok(HttpResponse::NoContent().finish()) +} From ce6e6ec2c59d69dbe31e4f8dc05bafa4e2ce6fac Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 31 Jan 2024 17:21:27 +0100 Subject: [PATCH 39/87] stops profiling in a file by default --- meilisearch/src/main.rs | 48 +++++++---------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 097a4d9bb..cd9fe22f8 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -36,50 +36,18 @@ fn default_layer() -> LogRouteType { /// does all the setup before meilisearch is launched fn setup(opt: &Opt) -> anyhow::Result { - let now = time::OffsetDateTime::now_utc(); - let format = time::format_description::parse("[year]-[month]-[day]_[hour]:[minute]:[second]")?; - let trace_file = format!("{}-indexing-trace.json", now.format(&format)?); - - let file = std::fs::File::create(&trace_file) - .with_context(|| format!("could not create trace file at '{}'", trace_file))?; - #[cfg(not(feature = "stats_alloc"))] - let (mut trace, layer) = tracing_trace::Trace::new(file); - #[cfg(feature = "stats_alloc")] - let (mut trace, layer) = tracing_trace::Trace::with_stats_alloc(file, &ALLOC); - let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer()); let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; - let subscriber = tracing_subscriber::registry() - .with(route_layer) - .with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .with_filter( - tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) - .unwrap(), - ), - ) - .with( - layer.with_filter( - tracing_subscriber::filter::Targets::new() - .with_target("indexing::", tracing::Level::TRACE), + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) + .unwrap(), ), - ); - - tokio::task::spawn(async move { - loop { - match tokio::time::timeout(std::time::Duration::from_secs(1), trace.receive()).await { - Ok(Ok(ControlFlow::Continue(()))) => continue, - Ok(Ok(ControlFlow::Break(_))) => break, - Ok(Err(_)) => todo!(), - Err(_) => trace.flush().unwrap(), - } - } - while trace.try_receive().is_ok() {} - trace.flush().unwrap(); - }); + ); // set the subscriber as the default for the application tracing::subscriber::set_global_default(subscriber).unwrap(); From 77254765e834ad6f536c5ef474f700458bc3278a Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 31 Jan 2024 17:46:36 +0100 Subject: [PATCH 40/87] get rids of env loggegr and fix the tests --- Cargo.lock | 1 - meilisearch/Cargo.toml | 1 - meilisearch/tests/common/server.rs | 8 ++++++++ meilisearch/tests/common/service.rs | 8 ++++++++ meilisearch/tests/tasks/webhook.rs | 4 ---- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 866912cc8..1f15047cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3620,7 +3620,6 @@ dependencies = [ "deserr", "dump", "either", - "env_logger", "file-store", "flate2", "fst", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 8ed35d542..21136b6e8 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11" deserr = { version = "0.6.1", features = ["actix-web"] } dump = { path = "../dump" } either = "1.9.0" -env_logger = "0.10.1" file-store = { path = "../file-store" } flate2 = "1.0.28" fst = "0.4.7" diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 27feb187f..134124cc8 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch}; use once_cell::sync::Lazy; use tempfile::TempDir; use tokio::time::sleep; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use super::index::Index; use super::service::Service; @@ -81,10 +83,16 @@ impl Server { Response = ServiceResponse, Error = actix_web::Error, > { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + actix_web::test::init_service(create_app( self.service.index_scheduler.clone().into(), self.service.auth.clone().into(), self.service.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.service.options), true, )) diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 078ddb6e5..4c23a18d8 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -7,6 +7,8 @@ use actix_web::test::TestRequest; use index_scheduler::IndexScheduler; use meilisearch::{analytics, create_app, Opt}; use meilisearch_auth::AuthController; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use crate::common::encoder::Encoder; use crate::common::Value; @@ -105,10 +107,16 @@ impl Service { } pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + let app = test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), self.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.options), true, )) diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs index 6979ff294..a18a93edb 100644 --- a/meilisearch/tests/tasks/webhook.rs +++ b/meilisearch/tests/tasks/webhook.rs @@ -44,10 +44,6 @@ struct WebhookHandle { } async fn create_webhook_server() -> WebhookHandle { - let mut log_builder = env_logger::Builder::new(); - log_builder.parse_filters("info"); - log_builder.init(); - let (sender, receiver) = mpsc::unbounded_channel(); let sender = Arc::new(sender); From 0f327f2821309d29c3a8c2b78a9c5a7276f24246 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 31 Jan 2024 17:44:28 +0100 Subject: [PATCH 41/87] tracing-trace: implement Error on Error --- tracing-trace/src/error.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tracing-trace/src/error.rs b/tracing-trace/src/error.rs index cce13f85c..831da1e9d 100644 --- a/tracing-trace/src/error.rs +++ b/tracing-trace/src/error.rs @@ -3,6 +3,8 @@ pub enum Error { Json(serde_json::Error), } +impl std::error::Error for Error {} + impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("error de/serializing trace entry:")?; From 0e7a411d4d8e081c6e9b6e4e3e4b28f1f2f811fb Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 31 Jan 2024 17:44:54 +0100 Subject: [PATCH 42/87] tracing-trace: introduce TraceWriter, trace now only exposes the channel --- tracing-trace/src/layer.rs | 26 ++++++++++++++++++++------ tracing-trace/src/lib.rs | 16 +++++++++++++++- tracing-trace/src/main.rs | 2 +- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index 1b9aadfa7..aa2908304 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -15,7 +15,7 @@ use crate::entry::{ Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, SpanExit, SpanId, }; -use crate::{Error, Trace}; +use crate::{Error, Trace, TraceWriter}; /// Layer that measures the time spent in spans. pub struct TraceLayer { @@ -25,10 +25,10 @@ pub struct TraceLayer { memory_allocator: Option<&'static StatsAlloc>, } -impl Trace { - pub fn new(writer: W) -> (Self, TraceLayer) { +impl Trace { + pub fn new() -> (Self, TraceLayer) { let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let trace = Trace { writer, receiver }; + let trace = Trace { receiver }; let layer = TraceLayer { sender, callsites: Default::default(), @@ -39,11 +39,10 @@ impl Trace { } pub fn with_stats_alloc( - writer: W, stats_alloc: &'static StatsAlloc, ) -> (Self, TraceLayer) { let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let trace = Trace { writer, receiver }; + let trace = Trace { receiver }; let layer = TraceLayer { sender, callsites: Default::default(), @@ -52,6 +51,21 @@ impl Trace { }; (trace, layer) } +} + +impl TraceWriter { + pub fn new(writer: W) -> (Self, TraceLayer) { + let (trace, layer) = Trace::new(); + (trace.into_writer(writer), layer) + } + + pub fn with_stats_alloc( + writer: W, + stats_alloc: &'static StatsAlloc, + ) -> (Self, TraceLayer) { + let (trace, layer) = Trace::with_stats_alloc(stats_alloc); + (trace.into_writer(writer), layer) + } pub async fn receive(&mut self) -> Result, Error> { let Some(entry) = self.receiver.recv().await else { diff --git a/tracing-trace/src/lib.rs b/tracing-trace/src/lib.rs index 3d00eef10..77eb61d47 100644 --- a/tracing-trace/src/lib.rs +++ b/tracing-trace/src/lib.rs @@ -9,11 +9,25 @@ pub mod processor; pub use error::Error; -pub struct Trace { +pub struct TraceWriter { writer: W, receiver: tokio::sync::mpsc::UnboundedReceiver, } +pub struct Trace { + receiver: tokio::sync::mpsc::UnboundedReceiver, +} + +impl Trace { + pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver { + self.receiver + } + + pub fn into_writer(self, writer: W) -> TraceWriter { + TraceWriter { writer, receiver: self.receiver } + } +} + pub struct TraceReader { reader: R, } diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs index f9f665861..c74bf1fb7 100644 --- a/tracing-trace/src/main.rs +++ b/tracing-trace/src/main.rs @@ -74,7 +74,7 @@ fn on_panic(info: &std::panic::PanicInfo) { fn main() { let (mut trace, profiling_layer) = - tracing_trace::Trace::new(std::fs::File::create("trace.json").unwrap()); + tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap()); let subscriber = tracing_subscriber::registry() // any number of other subscriber layers may be added before or From afc0585c1c1179c9293aad1b4d4907a7842619b2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 31 Jan 2024 17:46:57 +0100 Subject: [PATCH 43/87] meilisearch: don't spawn a report everytime Meilisearch starts --- meilisearch/src/main.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index cd9fe22f8..94a28eb32 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,6 +1,5 @@ use std::env; use std::io::{stderr, Write}; -use std::ops::ControlFlow; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; @@ -8,7 +7,6 @@ use std::sync::Arc; use actix_web::http::KeepAlive; use actix_web::web::Data; use actix_web::HttpServer; -use anyhow::Context; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; @@ -19,7 +17,7 @@ use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE} use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use tracing::level_filters::LevelFilter; -use tracing_subscriber::layer::{Filter, SubscriberExt as _}; +use tracing_subscriber::layer::SubscriberExt as _; use tracing_subscriber::Layer; #[cfg(not(feature = "stats_alloc"))] From 38e1c40f38174876221fa1c5ccc9c1dc7798785e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 31 Jan 2024 17:47:30 +0100 Subject: [PATCH 44/87] meilisearch: logs route disconnects in profile mode --- meilisearch/src/routes/logs.rs | 128 +++++++++++++++++++++------------ 1 file changed, 84 insertions(+), 44 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index ca1ca2d2f..b327acab1 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -1,6 +1,6 @@ use std::fmt; use std::io::Write; -use std::ops::ControlFlow; +use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; @@ -8,11 +8,12 @@ use actix_web::web::{Bytes, Data}; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::Deserr; +use futures_util::Stream; use meilisearch_auth::AuthController; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; -use meilisearch_types::error::ResponseError; -use tokio::sync::mpsc::{self, UnboundedSender}; +use meilisearch_types::error::{Code, ResponseError}; +use tokio::sync::mpsc::{self}; use tracing_subscriber::Layer; use crate::error::MeilisearchHttpError; @@ -91,13 +92,12 @@ impl Write for LogWriter { } } -struct LogStreamer { - receiver: mpsc::UnboundedReceiver>, +struct HandleGuard { /// We need to keep an handle on the logs to make it available again when the streamer is dropped logs: Arc, } -impl Drop for LogStreamer { +impl Drop for HandleGuard { fn drop(&mut self) { println!("log streamer being dropped"); if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) { @@ -106,56 +106,99 @@ impl Drop for LogStreamer { } } -impl LogStreamer { - pub fn into_stream(self) -> impl futures_util::Stream> { - futures_util::stream::unfold(self, move |mut this| async move { - let vec = this.receiver.recv().await; +fn byte_stream( + receiver: mpsc::UnboundedReceiver>, + guard: HandleGuard, +) -> impl futures_util::Stream> { + futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move { + let vec = receiver.recv().await; - vec.map(From::from).map(Ok).map(|a| (a, this)) - }) - } + vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard))) + }) } -pub fn make_layer< +type PinnedByteStream = Pin>>>; + +fn make_layer< S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, >( opt: &GetLogs, - sender: UnboundedSender>, -) -> Box + Send + Sync> { + logs: Data, +) -> (Box + Send + Sync>, PinnedByteStream) { + let guard = HandleGuard { logs: logs.into_inner() }; match opt.mode { LogMode::Fmt => { + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + let fmt_layer = tracing_subscriber::fmt::layer() .with_line_number(true) .with_writer(move || LogWriter { sender: sender.clone() }) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); - Box::new(fmt_layer) as Box + Send + Sync> + let stream = byte_stream(receiver, guard); + (Box::new(fmt_layer) as Box + Send + Sync>, Box::pin(stream)) } LogMode::Profile => { - let (mut trace, layer) = - tracing_trace::Trace::new(LogWriter { sender: sender.clone() }); + let (trace, layer) = tracing_trace::Trace::new(); - tokio::task::spawn(async move { - loop { - match tokio::time::timeout(std::time::Duration::from_secs(1), trace.receive()) - .await - { - Ok(Ok(ControlFlow::Continue(()))) => continue, - Ok(Ok(ControlFlow::Break(_))) => break, - // the other half of the channel was dropped - Ok(Err(_)) => break, - Err(_) => trace.flush().unwrap(), - } - } - while trace.try_receive().is_ok() {} - trace.flush().unwrap(); - }); + let stream = entry_stream(trace, guard); - Box::new(layer) as Box + Send + Sync> + (Box::new(layer) as Box + Send + Sync>, Box::pin(stream)) } } } +fn entry_stream( + trace: tracing_trace::Trace, + guard: HandleGuard, +) -> impl Stream> { + let receiver = trace.into_receiver(); + let entry_buf = Vec::new(); + + futures_util::stream::unfold( + (receiver, entry_buf, guard), + move |(mut receiver, mut entry_buf, guard)| async move { + let mut bytes = Vec::new(); + + while bytes.len() < 8192 { + entry_buf.clear(); + + let Ok(count) = tokio::time::timeout( + std::time::Duration::from_secs(1), + receiver.recv_many(&mut entry_buf, 100), + ) + .await + else { + break; + }; + + if count == 0 { + // channel closed, exit + return None; + } + + for entry in &entry_buf { + if let Err(error) = serde_json::to_writer(&mut bytes, entry) { + tracing::error!( + error = &error as &dyn std::error::Error, + "deserializing entry" + ); + return Some(( + Err(ResponseError::from_msg( + format!("error deserializing entry: {error}"), + Code::Internal, + )), + (receiver, entry_buf, guard), + )); + } + } + } + + Some((Ok(bytes.into()), (receiver, entry_buf, guard))) + }, + ) +} + pub async fn get_logs( _auth_controller: GuardedData, Data>, logs: Data, @@ -163,30 +206,27 @@ pub async fn get_logs( _req: HttpRequest, ) -> Result { let opt = body.into_inner(); - let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let mut was_available = false; + let mut stream = None; logs.modify(|layer| match layer.inner_mut() { None => { // there is no one getting logs - was_available = true; *layer.filter_mut() = tracing_subscriber::filter::Targets::from_str(&opt.target).unwrap(); - let new_layer = make_layer(&opt, sender); + let (new_layer, new_stream) = make_layer(&opt, logs.clone()); - *layer.inner_mut() = Some(new_layer) + *layer.inner_mut() = Some(new_layer); + stream = Some(new_stream); } Some(_) => { // there is already someone getting logs - was_available = false; } }) .unwrap(); - if was_available { - Ok(HttpResponse::Ok() - .streaming(LogStreamer { receiver, logs: logs.into_inner() }.into_stream())) + if let Some(stream) = stream { + Ok(HttpResponse::Ok().streaming(stream)) } else { Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) } From bf5cea8b100fba670e827d422f3167728d48bf14 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 1 Feb 2024 18:02:02 +0100 Subject: [PATCH 45/87] add a test --- meilisearch/tests/integration.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch/tests/integration.rs b/meilisearch/tests/integration.rs index b6992791a..943af802a 100644 --- a/meilisearch/tests/integration.rs +++ b/meilisearch/tests/integration.rs @@ -5,6 +5,7 @@ mod documents; mod dumps; mod features; mod index; +mod logs; mod search; mod settings; mod snapshot; From 80774148fd908bd45662beb762eec48412593f26 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 5 Feb 2024 11:47:56 +0100 Subject: [PATCH 46/87] handle and tests errors --- meilisearch/src/error.rs | 1 - meilisearch/src/routes/logs.rs | 90 ++++++++++++++++-------------- meilisearch/tests/logs/error.rs | 98 +++++++++++++++++++++++++++++++++ meilisearch/tests/logs/mod.rs | 89 ++++++++++++++++++++++++++++++ 4 files changed, 237 insertions(+), 41 deletions(-) create mode 100644 meilisearch/tests/logs/error.rs create mode 100644 meilisearch/tests/logs/mod.rs diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index ee54cf831..6c5f76a72 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -61,7 +61,6 @@ impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, - /// TODO: TAMO: create a new error code MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index b327acab1..96228f9aa 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -1,19 +1,21 @@ -use std::fmt; +use std::convert::Infallible; use std::io::Write; +use std::ops::ControlFlow; use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; use actix_web::web::{Bytes, Data}; -use actix_web::{web, HttpRequest, HttpResponse}; +use actix_web::{web, HttpResponse}; use deserr::actix_web::AwebJson; -use deserr::Deserr; +use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use futures_util::Stream; use meilisearch_auth::AuthController; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; use tokio::sync::mpsc::{self}; +use tracing_subscriber::filter::Targets; use tracing_subscriber::Layer; use crate::error::MeilisearchHttpError; @@ -30,17 +32,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -#[derive(Debug, Default, Clone, Copy, Deserr)] -#[deserr(rename_all = lowercase)] -pub enum LogLevel { - Error, - Warn, - #[default] - Info, - Debug, - Trace, -} - #[derive(Debug, Default, Clone, Copy, Deserr)] #[deserr(rename_all = lowercase)] pub enum LogMode { @@ -49,36 +40,59 @@ pub enum LogMode { Profile, } -#[derive(Debug, Deserr)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] -pub struct GetLogs { - #[deserr(default, error = DeserrJsonError)] - pub target: String, +/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Clone, Debug)] +struct MyTargets(Targets); - #[deserr(default, error = DeserrJsonError)] - pub mode: LogMode, +/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Debug, thiserror::Error)] +enum MyParseError { + #[error(transparent)] + ParseError(#[from] tracing_subscriber::filter::ParseError), + #[error( + "Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`" + )] + Example, } -impl fmt::Display for LogLevel { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - LogLevel::Error => f.write_str("error"), - LogLevel::Warn => f.write_str("warn"), - LogLevel::Info => f.write_str("info"), - LogLevel::Debug => f.write_str("debug"), - LogLevel::Trace => f.write_str("trace"), +impl FromStr for MyTargets { + type Err = MyParseError; + + fn from_str(s: &str) -> Result { + if s.is_empty() { + Err(MyParseError::Example) + } else { + Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?)) } } } -struct LogWriter { - sender: mpsc::UnboundedSender>, +impl MergeWithError for DeserrJsonError { + fn merge( + _self_: Option, + other: MyParseError, + merge_location: ValuePointerRef, + ) -> ControlFlow { + Self::error::( + None, + ErrorKind::Unexpected { msg: other.to_string() }, + merge_location, + ) + } } -impl Drop for LogWriter { - fn drop(&mut self) { - println!("hello"); - } +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct GetLogs { + #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] + target: MyTargets, + + #[deserr(default, error = DeserrJsonError)] + mode: LogMode, +} + +struct LogWriter { + sender: mpsc::UnboundedSender>, } impl Write for LogWriter { @@ -99,7 +113,6 @@ struct HandleGuard { impl Drop for HandleGuard { fn drop(&mut self) { - println!("log streamer being dropped"); if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) { tracing::error!("Could not free the logs route: {e}"); } @@ -203,7 +216,6 @@ pub async fn get_logs( _auth_controller: GuardedData, Data>, logs: Data, body: AwebJson, - _req: HttpRequest, ) -> Result { let opt = body.into_inner(); @@ -212,8 +224,7 @@ pub async fn get_logs( logs.modify(|layer| match layer.inner_mut() { None => { // there is no one getting logs - *layer.filter_mut() = - tracing_subscriber::filter::Targets::from_str(&opt.target).unwrap(); + *layer.filter_mut() = opt.target.0.clone(); let (new_layer, new_stream) = make_layer(&opt, logs.clone()); *layer.inner_mut() = Some(new_layer); @@ -235,7 +246,6 @@ pub async fn get_logs( pub async fn cancel_logs( _auth_controller: GuardedData, Data>, logs: Data, - _req: HttpRequest, ) -> Result { if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) { tracing::error!("Could not free the logs route: {e}"); diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs new file mode 100644 index 000000000..965b68d17 --- /dev/null +++ b/meilisearch/tests/logs/error.rs @@ -0,0 +1,98 @@ +use meili_snap::*; + +use crate::common::Server; +use crate::json; + +#[actix_rt::test] +async fn logs_bad_target() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs", json!({ "target": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs", json!({ "target": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Our help message + let (response, code) = server.service.post("/logs", json!({ "target": "" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // An error from the target parser + let (response, code) = server.service.post("/logs", json!({ "target": "==" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_bad_mode() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs", json!({ "mode": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs", json!({ "mode": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong value + let (response, code) = server.service.post("/logs", json!({ "mode": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Unknown value `tamo` at `.mode`: expected one of `fmt`, `profile`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs new file mode 100644 index 000000000..4aba0a4cd --- /dev/null +++ b/meilisearch/tests/logs/mod.rs @@ -0,0 +1,89 @@ +mod error; + +use std::rc::Rc; +use std::str::FromStr; + +use actix_web::http::header::ContentType; +use meili_snap::snapshot; +use meilisearch::{analytics, create_app, Opt}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Layer; + +use crate::common::{default_settings, Server}; +use crate::json; + +#[actix_web::test] +async fn basic_test_log_route() { + let db_path = tempfile::tempdir().unwrap(); + let server = + Server::new_with_options(Opt { ..default_settings(db_path.path()) }).await.unwrap(); + + let (route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_line_number(true) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()), + ); + + let app = actix_web::test::init_service(create_app( + server.service.index_scheduler.clone().into(), + server.service.auth.clone().into(), + server.service.options.clone(), + route_layer_handle, + analytics::MockAnalytics::new(&server.service.options), + true, + )) + .await; + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + let app = Rc::new(app); + + // First, we start listening on the `/logs` route + let handle_app = app.clone(); + let handle = tokio::task::spawn_local(async move { + let req = actix_web::test::TestRequest::post() + .uri("/logs") + .insert_header(ContentType::json()) + .set_payload( + serde_json::to_vec(&json!({ + "mode": "fmt", + "target": "info", + })) + .unwrap(), + ); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*handle_app, req).await; + actix_web::test::read_body(ret).await + }); + + // We're going to create an index to get at least one info log saying we processed a batch of task + let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await; + snapshot!(ret, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + server.wait_task(ret.uid()).await; + + let req = actix_web::test::TestRequest::delete().uri("/logs"); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*app, req).await; + let code = ret.status(); + snapshot!(code, @"204 No Content"); + + let logs = handle.await.unwrap(); + let logs = String::from_utf8(logs.to_vec()).unwrap(); + assert!(logs.contains("INFO"), "{logs}"); +} From 7793ba67a40fdb3a17c1e71a8b3799597620827d Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 5 Feb 2024 13:29:01 +0100 Subject: [PATCH 47/87] hide the route logs behind a feature flag --- index-scheduler/src/features.rs | 14 ++++++++++++++ meilisearch-types/src/features.rs | 1 + meilisearch/src/analytics/segment_analytics.rs | 3 +++ meilisearch/src/option.rs | 14 ++++++++++++++ meilisearch/src/routes/features.rs | 5 +++++ meilisearch/src/routes/logs.rs | 11 +++++++---- 6 files changed, 44 insertions(+), 4 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c18ab98db..744b47f11 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -43,6 +43,20 @@ impl RoFeatures { } } + pub fn check_logs_route(&self) -> Result<()> { + if self.runtime.logs_route { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "Getting logs", + feature: "logsRoute", + /// Update the discussion link + issue_link: "https://github.com/meilisearch/product/discussions/625", + } + .into()) + } + } + pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> { if self.runtime.vector_store { Ok(()) diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index d737c618e..f7e09317b 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; pub struct RuntimeTogglableFeatures { pub vector_store: bool, pub metrics: bool, + pub logs_route: bool, pub export_puffin_reports: bool, } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 6969c9b3d..a38ddaab2 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -250,6 +250,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, db_path: bool, @@ -287,6 +288,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, http_addr, @@ -333,6 +335,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 3cff14aa0..d612210c8 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,6 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; @@ -309,6 +310,14 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + // TODO: update the link + /// Experimental logs route feature. For more information, see: + /// + /// Enables the log route on the `POST /logs` endpoint and the `DELETE /logs` to stop receiving logs. + #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] + #[serde(default)] + pub experimental_enable_logs_route: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] @@ -414,6 +423,7 @@ impl Opt { #[cfg(feature = "analytics")] no_analytics, experimental_enable_metrics, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -470,6 +480,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, + experimental_enable_logs_route.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, experimental_reduce_indexing_memory_usage.to_string(), diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 0a7e73ac6..375201a97 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -45,6 +45,8 @@ pub struct RuntimeTogglableFeatures { #[deserr(default)] pub metrics: Option, #[deserr(default)] + pub logs_route: Option, + #[deserr(default)] pub export_puffin_reports: Option, } @@ -63,6 +65,7 @@ async fn patch_features( let new_features = meilisearch_types::features::RuntimeTogglableFeatures { vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), + logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route), export_puffin_reports: new_features .0 .export_puffin_reports @@ -75,6 +78,7 @@ async fn patch_features( let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, + logs_route, export_puffin_reports, } = new_features; @@ -83,6 +87,7 @@ async fn patch_features( json!({ "vector_store": vector_store, "metrics": metrics, + "logs_route": logs_route, "export_puffin_reports": export_puffin_reports, }), Some(&req), diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 96228f9aa..bca944d76 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -10,7 +10,7 @@ use actix_web::{web, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; use futures_util::Stream; -use meilisearch_auth::AuthController; +use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; @@ -213,12 +213,13 @@ fn entry_stream( } pub async fn get_logs( - _auth_controller: GuardedData, Data>, + index_scheduler: GuardedData, Data>, logs: Data, body: AwebJson, ) -> Result { - let opt = body.into_inner(); + index_scheduler.features().check_logs_route()?; + let opt = body.into_inner(); let mut stream = None; logs.modify(|layer| match layer.inner_mut() { @@ -244,9 +245,11 @@ pub async fn get_logs( } pub async fn cancel_logs( - _auth_controller: GuardedData, Data>, + index_scheduler: GuardedData, Data>, logs: Data, ) -> Result { + index_scheduler.features().check_logs_route()?; + if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) { tracing::error!("Could not free the logs route: {e}"); } From e23ec4886d808a482124adffac07c13059d60487 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 5 Feb 2024 14:05:24 +0100 Subject: [PATCH 48/87] fix the tests and add tests on the experimental features --- index-scheduler/src/features.rs | 5 +++-- meilisearch-types/src/features.rs | 1 + meilisearch/src/option.rs | 5 ++++- meilisearch/tests/dumps/mod.rs | 1 + meilisearch/tests/features/mod.rs | 8 +++++++- meilisearch/tests/logs/error.rs | 27 +++++++++++++++++++++++++++ meilisearch/tests/logs/mod.rs | 8 ++++++-- meilisearch/tests/search/hybrid.rs | 1 + 8 files changed, 50 insertions(+), 6 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 744b47f11..9eaa658d9 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -48,8 +48,8 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "Getting logs", - feature: "logsRoute", + disabled_action: "getting logs through the `/logs` route", + feature: "logs route", /// Update the discussion link issue_link: "https://github.com/meilisearch/product/discussions/625", } @@ -95,6 +95,7 @@ impl FeatureData { runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { metrics: instance_features.metrics || persisted_features.metrics, + logs_route: instance_features.logs_route || persisted_features.logs_route, ..persisted_features })); diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index f7e09317b..04a5d9d6f 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -12,4 +12,5 @@ pub struct RuntimeTogglableFeatures { #[derive(Default, Debug, Clone, Copy)] pub struct InstanceTogglableFeatures { pub metrics: bool, + pub logs_route: bool, } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index d612210c8..362f7a33f 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -538,7 +538,10 @@ impl Opt { } pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { - InstanceTogglableFeatures { metrics: self.experimental_enable_metrics } + InstanceTogglableFeatures { + metrics: self.experimental_enable_metrics, + logs_route: self.experimental_enable_logs_route, + } } } diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 632180aac..e8061ae4a 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1847,6 +1847,7 @@ async fn import_dump_v6_containing_experimental_features() { { "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index a8147f111..3a9812f30 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -20,6 +20,7 @@ async fn experimental_features() { { "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -31,6 +32,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -42,6 +44,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -54,6 +57,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -66,6 +70,7 @@ async fn experimental_features() { { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -85,6 +90,7 @@ async fn experimental_feature_metrics() { { "vectorStore": false, "metrics": true, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -140,7 +146,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index 965b68d17..b6c4605ed 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -96,3 +96,30 @@ async fn logs_bad_mode() { } "###); } + +#[actix_rt::test] +async fn logs_without_enabling_the_route() { + let server = Server::new().await; + + let (response, code) = server.service.post("/logs", json!({})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs` route requires enabling the `logs route` experimental feature. See https://github.com/meilisearch/product/discussions/625", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + + let (response, code) = server.service.delete("/logs").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs` route requires enabling the `logs route` experimental feature. See https://github.com/meilisearch/product/discussions/625", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 4aba0a4cd..ad1fa4048 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -16,8 +16,12 @@ use crate::json; #[actix_web::test] async fn basic_test_log_route() { let db_path = tempfile::tempdir().unwrap(); - let server = - Server::new_with_options(Opt { ..default_settings(db_path.path()) }).await.unwrap(); + let server = Server::new_with_options(Opt { + experimental_enable_logs_route: true, + ..default_settings(db_path.path()) + }) + .await + .unwrap(); let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(None.with_filter( diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index d3e556ab3..85bc96d86 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -15,6 +15,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde { "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); From f158e96fe7cb84ad1554bfe5f71a76d24341cdbf Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 5 Feb 2024 14:14:13 +0100 Subject: [PATCH 49/87] fix the auth --- meilisearch/src/routes/logs.rs | 4 ++-- meilisearch/tests/auth/authorization.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index bca944d76..c22ca2129 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -213,7 +213,7 @@ fn entry_stream( } pub async fn get_logs( - index_scheduler: GuardedData, Data>, + index_scheduler: GuardedData, Data>, logs: Data, body: AwebJson, ) -> Result { @@ -245,7 +245,7 @@ pub async fn get_logs( } pub async fn cancel_logs( - index_scheduler: GuardedData, Data>, + index_scheduler: GuardedData, Data>, logs: Data, ) -> Result { index_scheduler.features().check_logs_route()?; diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index af028060d..88635e62f 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy hashset!{"snapshots.create", "snapshots.*", "*"}, ("GET", "/version") => hashset!{"version", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, + ("POST", "/logs") => hashset!{"metrics.get", "metrics.*", "*"}, + ("DELETE", "/logs") => hashset!{"metrics.get", "metrics.*", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, From e773dfa9bab483931e9375a98291107c8b0655db Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 6 Feb 2024 10:49:23 +0100 Subject: [PATCH 50/87] get rids of log in milli and add logs for the bucket sort --- Cargo.lock | 1 - milli/Cargo.toml | 1 - milli/src/search/mod.rs | 2 +- milli/src/search/new/bucket_sort.rs | 4 ++++ milli/src/update/facet/mod.rs | 2 +- milli/src/update/index_documents/enrich.rs | 2 +- .../extract/extract_fid_docid_facet_values.rs | 2 +- .../extract/extract_vector_points.rs | 4 ++-- milli/src/update/index_documents/extract/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 2 +- milli/src/update/index_documents/typed_chunk.rs | 2 +- milli/src/update/words_prefix_integer_docids.rs | 2 +- milli/src/vector/hf.rs | 2 +- milli/src/vector/openai.rs | 14 +++++++++----- 14 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f15047cb..4ad454e80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3813,7 +3813,6 @@ dependencies = [ "json-depth-checker", "levenshtein_automata", "liquid", - "log", "logging_timer", "maplit", "md5", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 10ad2fb8f..4bc05d2cc 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -71,7 +71,6 @@ itertools = "0.11.0" puffin = "0.16.0" # logging -log = "0.4.20" logging_timer = "1.1.0" csv = "1.3.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7bac5ea0c..e411bd032 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -6,9 +6,9 @@ use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; +use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index b439b87ec..e7bafaf70 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; } + let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id()); + let entered = span.enter(); + let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( ctx, logger, @@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( back!(); continue; }; + drop(entered); ranking_rule_scores.push(next_bucket.score); diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ad8a838c8..400507c97 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; -use log::debug; use time::OffsetDateTime; +use tracing::debug; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 82ea335ae..162136912 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -78,7 +78,7 @@ pub fn enrich_documents_batch( }, [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [(field_id, name)] => { - log::info!("Primary key was not specified in index. Inferred to '{name}'"); + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); PrimaryKey::Flat { name, field_id: *field_id } } multiple => { diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 2449e01cd..1f8af372d 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { if let Ok(float) = original.parse() { output_numbers.push(float); } else { - log::warn!( + tracing::warn!( "Internal error, could not parse a geofield that has been validated. Please open an issue." ) } diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 0bf7333e3..117f6cc8c 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -186,12 +186,12 @@ pub fn extract_vector_points( prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; if old_prompt != new_prompt { - log::trace!( + tracing::trace!( "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" ); VectorStateDelta::NowGenerated(new_prompt) } else { - log::trace!("⏭️ Prompt unmodified, skipping"); + tracing::trace!("⏭️ Prompt unmodified, skipping"); VectorStateDelta::NoChange } } else { diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index f0fd97965..357cdf8d7 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -14,8 +14,8 @@ use std::fs::File; use std::io::BufReader; use crossbeam_channel::Sender; -use log::debug; use rayon::prelude::*; +use tracing::debug; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index cbcde19fc..ca3d6bdd1 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -13,11 +13,11 @@ use std::result::Result as StdResult; use crossbeam_channel::{Receiver, Sender}; use heed::types::Str; use heed::Database; -use log::debug; use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; +use tracing::debug; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 7db0279ba..37399dcd5 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index( } } - log::debug!("Finished vector chunk for {}", embedder_name); + tracing::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 23a676bc8..a05eb8721 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -4,7 +4,7 @@ use std::str; use grenad::CompressionType; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; -use log::debug; +use tracing::debug; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index cdfdbfb75..04e169c71 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -73,7 +73,7 @@ impl Embedder { let device = match candle_core::Device::cuda_if_available(0) { Ok(device) => device, Err(error) => { - log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); + tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); candle_core::Device::Cpu } }; diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 104decb66..cbddddfb7 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -173,12 +173,16 @@ impl Embedder { let retry_duration = match result { Ok(embeddings) => return Ok(embeddings), Err(retry) => { - log::warn!("Failed: {}", retry.error); + tracing::warn!("Failed: {}", retry.error); tokenized |= retry.must_tokenize(); retry.into_duration(attempt) } }?; - log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); + tracing::warn!( + "Attempt #{}, retrying after {}ms.", + attempt, + retry_duration.as_millis() + ); tokio::time::sleep(retry_duration).await; } @@ -244,7 +248,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, @@ -266,7 +270,7 @@ impl Embedder { client: &reqwest::Client, ) -> Result>, Retry> { for text in texts { - log::trace!("Received prompt: {}", text.as_ref()) + tracing::trace!("Received prompt: {}", text.as_ref()) } let request = OpenAiRequest { model: self.options.embedding_model.name(), @@ -289,7 +293,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::trace!("response: {:?}", response.data); + tracing::trace!("response: {:?}", response.data); Ok(response .data From b393823f368811f6711820c37e499d26bd042a23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 6 Feb 2024 14:41:14 +0100 Subject: [PATCH 51/87] Replace stats_alloc with procfs --- Cargo.lock | 33 ++++-- meilisearch/Cargo.toml | 1 - meilisearch/src/main.rs | 5 - meilisearch/src/routes/logs.rs | 2 +- tracing-trace/Cargo.toml | 3 +- tracing-trace/src/entry.rs | 74 +++++------- tracing-trace/src/layer.rs | 45 ++------ .../src/processor/firefox_profiler.rs | 106 ++++-------------- tracing-trace/src/processor/fmt.rs | 22 +--- 9 files changed, 99 insertions(+), 192 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4ad454e80..8f8fd1ff1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3667,7 +3667,6 @@ dependencies = [ "siphasher 1.0.0", "slice-group-by", "static-files", - "stats_alloc", "sysinfo", "tar", "temp-env", @@ -4433,6 +4432,29 @@ dependencies = [ "rustix 0.36.16", ] +[[package]] +name = "procfs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" +dependencies = [ + "bitflags 2.4.1", + "hex", + "lazy_static", + "procfs-core", + "rustix 0.38.26", +] + +[[package]] +name = "procfs-core" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" +dependencies = [ + "bitflags 2.4.1", + "hex", +] + [[package]] name = "prometheus" version = "0.13.3" @@ -4445,7 +4467,7 @@ dependencies = [ "libc", "memchr", "parking_lot", - "procfs", + "procfs 0.14.2", "protobuf", "thiserror", ] @@ -5222,11 +5244,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "stats_alloc" -version = "0.1.10" -source = "git+https://github.com/Kerollmops/stats_alloc?branch=stable-const-fn-trait#6f83c52160c7d0550fdf770e1f73d239b0ff9a97" - [[package]] name = "strsim" version = "0.10.0" @@ -5705,9 +5722,9 @@ dependencies = [ "byte-unit", "color-spantrace", "fxprof-processed-profile", + "procfs 0.16.0", "serde", "serde_json", - "stats_alloc", "tokio", "tracing", "tracing-error", diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 21136b6e8..2a7b5ade1 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -107,7 +107,6 @@ url = { version = "2.5.0", features = ["serde"] } tracing = "0.1.40" tracing-subscriber = "0.3.18" tracing-trace = { version = "0.1.0", path = "../tracing-trace" } -stats_alloc = { git = "https://github.com/Kerollmops/stats_alloc", branch = "stable-const-fn-trait", optional = true } [dev-dependencies] actix-rt = "2.9.0" diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 94a28eb32..734f50de3 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -20,14 +20,9 @@ use tracing::level_filters::LevelFilter; use tracing_subscriber::layer::SubscriberExt as _; use tracing_subscriber::Layer; -#[cfg(not(feature = "stats_alloc"))] #[global_allocator] static ALLOC: MiMalloc = MiMalloc; -#[cfg(feature = "stats_alloc")] -#[global_allocator] -static ALLOC: stats_alloc::StatsAlloc = stats_alloc::StatsAlloc::new(MiMalloc); - fn default_layer() -> LogRouteType { None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF)) } diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index c22ca2129..a62f6d648 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -14,7 +14,7 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::{Code, ResponseError}; -use tokio::sync::mpsc::{self}; +use tokio::sync::mpsc; use tracing_subscriber::filter::Targets; use tracing_subscriber::Layer; diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index da5e2b36c..4fe3ca735 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -13,10 +13,11 @@ serde_json = "1.0.111" tracing = "0.1.40" tracing-error = "0.2.0" tracing-subscriber = "0.3.18" -stats_alloc = { git = "https://github.com/Kerollmops/stats_alloc", branch = "stable-const-fn-trait" } byte-unit = { version = "4.0.19", default-features = false, features = [ "std", "serde", ] } tokio = { version = "1.35.1", features = ["sync"] } +[target.'cfg(target_os = "linux")'.dependencies] +procfs = { version = "0.16.0", default-features = false } diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index 61151b04c..f0136c18c 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -101,58 +101,46 @@ pub struct SpanClose { } /// A struct with a lot of memory allocation stats akin -/// to the `stats_alloc::Stats` one but implements the -/// `Serialize/Deserialize` serde traits. +/// to the `procfs::Process::StatsM` one plus the OOM score. +/// +/// Note that all the values are in bytes not in pages. #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] pub struct MemoryStats { - pub allocations: usize, - pub deallocations: usize, - pub reallocations: usize, - pub bytes_allocated: usize, - pub bytes_deallocated: usize, - pub bytes_reallocated: isize, -} - -impl From for MemoryStats { - fn from(stats: stats_alloc::Stats) -> Self { - let stats_alloc::Stats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - } = stats; - MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - } - } + /// Resident set size, measured in bytes. + /// (same as VmRSS in /proc//status). + pub resident: u64, + /// Number of resident shared bytes (i.e., backed by a file). + /// (same as RssFile+RssShmem in /proc//status). + pub shared: u64, + /// The current score that the kernel gives to this process + /// for the purpose of selecting a process for the OOM-killer + /// + /// A higher score means that the process is more likely to be selected + /// by the OOM-killer. The basis for this score is the amount of memory used + /// by the process, plus other factors. + /// + /// (Since linux 2.6.11) + pub oom_score: u32, } impl MemoryStats { + #[cfg(target_os = "linux")] + pub fn fetch() -> procfs::ProcResult { + let process = procfs::process::Process::myself().unwrap(); + let procfs::process::StatM { resident, shared, .. } = process.statm()?; + let oom_score = process.oom_score()?; + let page_size = procfs::page_size(); + + Ok(MemoryStats { resident: resident * page_size, shared: shared * page_size, oom_score }) + } + pub fn checked_sub(self, other: Self) -> Option { Some(Self { - allocations: self.allocations.checked_sub(other.allocations)?, - deallocations: self.deallocations.checked_sub(other.deallocations)?, - reallocations: self.reallocations.checked_sub(other.reallocations)?, - bytes_allocated: self.bytes_allocated.checked_sub(other.bytes_allocated)?, - bytes_deallocated: self.bytes_deallocated.checked_sub(other.bytes_deallocated)?, - bytes_reallocated: self.bytes_reallocated.checked_sub(other.bytes_reallocated)?, + resident: self.resident.checked_sub(other.resident)?, + shared: self.shared.checked_sub(other.shared)?, + oom_score: self.oom_score.checked_sub(other.oom_score)?, }) } - - pub fn usage(&self) -> isize { - (self.bytes_allocated - self.bytes_deallocated) as isize + self.bytes_reallocated - } - - pub fn operations(&self) -> usize { - self.allocations + self.deallocations + self.reallocations - } } #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index aa2908304..96690ff1f 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -1,11 +1,9 @@ -use std::alloc::{GlobalAlloc, System}; use std::borrow::Cow; use std::collections::HashMap; use std::io::Write; use std::ops::ControlFlow; use std::sync::RwLock; -use stats_alloc::StatsAlloc; use tracing::span::{Attributes, Id as TracingId}; use tracing::{Metadata, Subscriber}; use tracing_subscriber::layer::Context; @@ -18,55 +16,31 @@ use crate::entry::{ use crate::{Error, Trace, TraceWriter}; /// Layer that measures the time spent in spans. -pub struct TraceLayer { +pub struct TraceLayer { sender: tokio::sync::mpsc::UnboundedSender, callsites: RwLock>, start_time: std::time::Instant, - memory_allocator: Option<&'static StatsAlloc>, } impl Trace { - pub fn new() -> (Self, TraceLayer) { + pub fn new() -> (Self, TraceLayer) { let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let trace = Trace { receiver }; let layer = TraceLayer { sender, callsites: Default::default(), start_time: std::time::Instant::now(), - memory_allocator: None, - }; - (trace, layer) - } - - pub fn with_stats_alloc( - stats_alloc: &'static StatsAlloc, - ) -> (Self, TraceLayer) { - let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); - let trace = Trace { receiver }; - let layer = TraceLayer { - sender, - callsites: Default::default(), - start_time: std::time::Instant::now(), - memory_allocator: Some(stats_alloc), }; (trace, layer) } } impl TraceWriter { - pub fn new(writer: W) -> (Self, TraceLayer) { + pub fn new(writer: W) -> (Self, TraceLayer) { let (trace, layer) = Trace::new(); (trace.into_writer(writer), layer) } - pub fn with_stats_alloc( - writer: W, - stats_alloc: &'static StatsAlloc, - ) -> (Self, TraceLayer) { - let (trace, layer) = Trace::with_stats_alloc(stats_alloc); - (trace.into_writer(writer), layer) - } - pub async fn receive(&mut self) -> Result, Error> { let Some(entry) = self.receiver.recv().await else { return Ok(ControlFlow::Break(())); @@ -107,7 +81,7 @@ enum OpaqueIdentifier { Call(tracing::callsite::Identifier), } -impl TraceLayer { +impl TraceLayer { fn resource_id(&self, opaque: OpaqueIdentifier) -> Option { self.callsites.read().unwrap().get(&opaque).copied() } @@ -122,8 +96,14 @@ impl TraceLayer { self.start_time.elapsed() } + #[cfg(target_os = "linux")] fn memory_stats(&self) -> Option { - self.memory_allocator.map(|ma| ma.stats().into()) + Some(MemoryStats::fetch().unwrap()) + } + + #[cfg(not(target_os = "linux"))] + fn memory_stats(&self) -> Option { + None } fn send(&self, entry: Entry) { @@ -160,10 +140,9 @@ impl TraceLayer { } } -impl Layer for TraceLayer +impl Layer for TraceLayer where S: Subscriber, - A: GlobalAlloc, { fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) { let call_id = self diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index 126b4af1a..5daf202bd 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -227,8 +227,8 @@ fn add_memory_samples( profile.add_counter_sample( memory_counters.usage, last_timestamp, - stats.usage() as f64 - last_memory.usage() as f64, - stats.operations().checked_sub(last_memory.operations()).unwrap_or_default() as u32, + stats.resident as f64 - last_memory.resident as f64, + 0, ); let delta = stats.checked_sub(*last_memory); @@ -317,39 +317,21 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { searchable: true, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "allocations", - label: "Number of allocation operations while this function was executing", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "deallocations", - label: "Number of deallocation operations while this function was executing", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "reallocations", - label: "Number of reallocation operations while this function was executing", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "allocated_bytes", - label: "Number of allocated bytes while this function was executing", + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", format: MarkerFieldFormat::Bytes, searchable: false, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "deallocated_bytes", - label: "Number of deallocated bytes while this function was executing", + key: "shared", + label: "Number of resident shared pages (i.e., backed by a file) while this function was executing", format: MarkerFieldFormat::Bytes, searchable: false, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "reallocated_bytes", - label: "Number of reallocated bytes while this function was executing", - format: MarkerFieldFormat::Bytes, + key: "oom_score", + label: "The current score that the kernel gives to this process for the purpose of selecting a process for the OOM-killer while this function was executing", + format: MarkerFieldFormat::Integer, searchable: false, }), ]; @@ -384,21 +366,10 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { "thread_id": thread_id, }); - if let Some(MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - }) = self.memory_delta - { - value["allocations"] = json!(allocations); - value["deallocations"] = json!(deallocations); - value["reallocations"] = json!(reallocations); - value["allocated_bytes"] = json!(bytes_allocated); - value["deallocated_bytes"] = json!(bytes_deallocated); - value["reallocated_bytes"] = json!(bytes_reallocated); + if let Some(MemoryStats { resident, shared, oom_score }) = self.memory_delta { + value["resident"] = json!(resident); + value["shared"] = json!(shared); + value["oom_score"] = json!(oom_score); } value @@ -447,39 +418,21 @@ impl<'a> ProfilerMarker for EventMarker<'a> { searchable: true, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "allocations", - label: "Number of allocation operations since last measure", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "deallocations", - label: "Number of deallocation operations since last measure", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "reallocations", - label: "Number of reallocation operations since last measure", - format: MarkerFieldFormat::Integer, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "allocated_bytes", - label: "Number of allocated bytes since last measure", + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", format: MarkerFieldFormat::Bytes, searchable: false, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "deallocated_bytes", - label: "Number of deallocated bytes since last measure", + key: "shared", + label: "Number of resident shared pages (i.e., backed by a file) while this function was executing", format: MarkerFieldFormat::Bytes, searchable: false, }), MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "reallocated_bytes", - label: "Number of reallocated bytes since last measure", - format: MarkerFieldFormat::Bytes, + key: "oom_score", + label: "The current score that the kernel gives to this process for the purpose of selecting a process for the OOM-killer while this function was executing", + format: MarkerFieldFormat::Integer, searchable: false, }), ]; @@ -514,21 +467,10 @@ impl<'a> ProfilerMarker for EventMarker<'a> { "thread_id": thread_id, }); - if let Some(MemoryStats { - allocations, - deallocations, - reallocations, - bytes_allocated, - bytes_deallocated, - bytes_reallocated, - }) = self.memory_delta - { - value["allocations"] = json!(allocations); - value["deallocations"] = json!(deallocations); - value["reallocations"] = json!(reallocations); - value["allocated_bytes"] = json!(bytes_allocated); - value["deallocated_bytes"] = json!(bytes_deallocated); - value["reallocated_bytes"] = json!(bytes_reallocated); + if let Some(MemoryStats { resident, shared, oom_score }) = self.memory_delta { + value["resident"] = json!(resident); + value["shared"] = json!(shared); + value["oom_score"] = json!(oom_score); } value diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index 166930dfc..8c6af1640 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -188,23 +188,9 @@ fn print_duration(duration: std::time::Duration) -> String { } /// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. -fn print_memory(memory: MemoryStats) -> String { +fn print_memory(MemoryStats { resident, shared, oom_score }: MemoryStats) -> String { use byte_unit::Byte; - - let allocated_bytes = Byte::from_bytes(memory.bytes_allocated.try_into().unwrap()); - let deallocated_bytes = Byte::from_bytes(memory.bytes_deallocated.try_into().unwrap()); - - let reallocated_sign = if memory.bytes_reallocated < 0 { "-" } else { "" }; - let reallocated_bytes = - Byte::from_bytes(memory.bytes_reallocated.abs_diff(0).try_into().unwrap()); - - let adjusted_allocated_bytes = allocated_bytes.get_appropriate_unit(true); - let adjusted_deallocated_bytes = deallocated_bytes.get_appropriate_unit(true); - let adjusted_reallocated_bytes = reallocated_bytes.get_appropriate_unit(true); - - format!( - "Allocated {adjusted_allocated_bytes:.2}, \ - Deallocated {adjusted_deallocated_bytes:.2}, \ - Reallocated {reallocated_sign}{adjusted_reallocated_bytes:.2}" - ) + let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true); + let shared_bytes = Byte::from_bytes(shared).get_appropriate_unit(true); + format!("RSS {rss_bytes:.2}, Shared {shared_bytes:.2}, OOM score {oom_score}") } From bc097d90cb48c0408f8a7beb52e6a2f1597b2338 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 1 Feb 2024 16:48:08 +0100 Subject: [PATCH 52/87] tracing-trace: Spanstats deserializable + public fields --- tracing-trace/src/processor/span_stats.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs index 63b6ae5c1..0d6d2f4e2 100644 --- a/tracing-trace/src/processor/span_stats.rs +++ b/tracing-trace/src/processor/span_stats.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeMap, HashMap}; use std::time::Duration; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit}; use crate::{Error, TraceReader}; @@ -12,10 +12,10 @@ enum SpanStatus { Inside(std::time::Duration), } -#[derive(Serialize)] +#[derive(Serialize, Deserialize)] pub struct CallStats { - nb: usize, - ns: u64, + pub nb: usize, + pub ns: u64, } pub fn to_call_stats( From d78ada07b5594d6082f49947edcaed1b35fc0b77 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 5 Feb 2024 17:38:50 +0100 Subject: [PATCH 53/87] spanstats: change field names --- tracing-trace/src/processor/span_stats.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs index 0d6d2f4e2..f3e6238ff 100644 --- a/tracing-trace/src/processor/span_stats.rs +++ b/tracing-trace/src/processor/span_stats.rs @@ -14,8 +14,8 @@ enum SpanStatus { #[derive(Serialize, Deserialize)] pub struct CallStats { - pub nb: usize, - pub ns: u64, + pub call_count: usize, + pub time: u64, } pub fn to_call_stats( @@ -75,5 +75,5 @@ fn site_to_string(call_site: NewCallsite) -> String { fn calls_to_stats(calls: Vec) -> CallStats { let nb = calls.len(); let sum: Duration = calls.iter().sum(); - CallStats { nb, ns: sum.as_nanos() as u64 } + CallStats { call_count: nb, time: sum.as_nanos() as u64 } } From 02dcaf07dbdf7e8b117cedda146a0b39d6fae60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 6 Feb 2024 18:12:04 +0100 Subject: [PATCH 54/87] Replace the procfs by libproc --- Cargo.lock | 106 +++++++++++++----- tracing-trace/Cargo.toml | 4 +- tracing-trace/src/entry.rs | 39 ++----- tracing-trace/src/layer.rs | 4 +- .../src/processor/firefox_profiler.rs | 32 +----- tracing-trace/src/processor/fmt.rs | 5 +- 6 files changed, 100 insertions(+), 90 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f8fd1ff1..21d749727 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -535,6 +535,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags 2.4.1", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.48", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -831,6 +851,15 @@ dependencies = [ "smallvec", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -915,6 +944,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.4.17" @@ -3119,6 +3159,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -3146,6 +3192,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.2.7" @@ -3162,6 +3218,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libproc" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef" +dependencies = [ + "bindgen", + "errno", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.12" @@ -4177,6 +4244,12 @@ dependencies = [ "sha2", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "pem" version = "1.1.1" @@ -4432,29 +4505,6 @@ dependencies = [ "rustix 0.36.16", ] -[[package]] -name = "procfs" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" -dependencies = [ - "bitflags 2.4.1", - "hex", - "lazy_static", - "procfs-core", - "rustix 0.38.26", -] - -[[package]] -name = "procfs-core" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" -dependencies = [ - "bitflags 2.4.1", - "hex", -] - [[package]] name = "prometheus" version = "0.13.3" @@ -4467,7 +4517,7 @@ dependencies = [ "libc", "memchr", "parking_lot", - "procfs 0.14.2", + "procfs", "protobuf", "thiserror", ] @@ -5079,6 +5129,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -5722,7 +5778,7 @@ dependencies = [ "byte-unit", "color-spantrace", "fxprof-processed-profile", - "procfs 0.16.0", + "libproc", "serde", "serde_json", "tokio", diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml index 4fe3ca735..64848bff9 100644 --- a/tracing-trace/Cargo.toml +++ b/tracing-trace/Cargo.toml @@ -19,5 +19,5 @@ byte-unit = { version = "4.0.19", default-features = false, features = [ ] } tokio = { version = "1.35.1", features = ["sync"] } -[target.'cfg(target_os = "linux")'.dependencies] -procfs = { version = "0.16.0", default-features = false } +[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] +libproc = "0.14.2" diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index f0136c18c..29cf9326c 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -100,46 +100,29 @@ pub struct SpanClose { pub time: std::time::Duration, } -/// A struct with a lot of memory allocation stats akin -/// to the `procfs::Process::StatsM` one plus the OOM score. -/// -/// Note that all the values are in bytes not in pages. +/// A struct with a memory allocation stat. #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] pub struct MemoryStats { /// Resident set size, measured in bytes. /// (same as VmRSS in /proc//status). pub resident: u64, - /// Number of resident shared bytes (i.e., backed by a file). - /// (same as RssFile+RssShmem in /proc//status). - pub shared: u64, - /// The current score that the kernel gives to this process - /// for the purpose of selecting a process for the OOM-killer - /// - /// A higher score means that the process is more likely to be selected - /// by the OOM-killer. The basis for this score is the amount of memory used - /// by the process, plus other factors. - /// - /// (Since linux 2.6.11) - pub oom_score: u32, } impl MemoryStats { - #[cfg(target_os = "linux")] - pub fn fetch() -> procfs::ProcResult { - let process = procfs::process::Process::myself().unwrap(); - let procfs::process::StatM { resident, shared, .. } = process.statm()?; - let oom_score = process.oom_score()?; - let page_size = procfs::page_size(); + #[cfg(any(target_os = "linux", target_os = "macos"))] + pub fn fetch() -> Option { + use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0}; - Ok(MemoryStats { resident: resident * page_size, shared: shared * page_size, oom_score }) + match pidrusage(std::process::id() as i32) { + Ok(RUsageInfoV0 { ri_resident_size, .. }) => { + Some(MemoryStats { resident: ri_resident_size }) + } + Err(_) => None, /* ignoring error to avoid spamming */ + } } pub fn checked_sub(self, other: Self) -> Option { - Some(Self { - resident: self.resident.checked_sub(other.resident)?, - shared: self.shared.checked_sub(other.shared)?, - oom_score: self.oom_score.checked_sub(other.oom_score)?, - }) + Some(Self { resident: self.resident.checked_sub(other.resident)? }) } } diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index 96690ff1f..a2d3232c8 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -96,12 +96,12 @@ impl TraceLayer { self.start_time.elapsed() } - #[cfg(target_os = "linux")] + #[cfg(any(target_os = "linux", target_os = "macos"))] fn memory_stats(&self) -> Option { Some(MemoryStats::fetch().unwrap()) } - #[cfg(not(target_os = "linux"))] + #[cfg(not(any(target_os = "linux", target_os = "macos")))] fn memory_stats(&self) -> Option { None } diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs index 5daf202bd..bae8ea44a 100644 --- a/tracing-trace/src/processor/firefox_profiler.rs +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -322,18 +322,6 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { format: MarkerFieldFormat::Bytes, searchable: false, }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "shared", - label: "Number of resident shared pages (i.e., backed by a file) while this function was executing", - format: MarkerFieldFormat::Bytes, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "oom_score", - label: "The current score that the kernel gives to this process for the purpose of selecting a process for the OOM-killer while this function was executing", - format: MarkerFieldFormat::Integer, - searchable: false, - }), ]; MarkerSchema { @@ -366,10 +354,8 @@ impl<'a> ProfilerMarker for SpanMarker<'a> { "thread_id": thread_id, }); - if let Some(MemoryStats { resident, shared, oom_score }) = self.memory_delta { + if let Some(MemoryStats { resident }) = self.memory_delta { value["resident"] = json!(resident); - value["shared"] = json!(shared); - value["oom_score"] = json!(oom_score); } value @@ -423,18 +409,6 @@ impl<'a> ProfilerMarker for EventMarker<'a> { format: MarkerFieldFormat::Bytes, searchable: false, }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "shared", - label: "Number of resident shared pages (i.e., backed by a file) while this function was executing", - format: MarkerFieldFormat::Bytes, - searchable: false, - }), - MarkerSchemaField::Dynamic(MarkerDynamicField { - key: "oom_score", - label: "The current score that the kernel gives to this process for the purpose of selecting a process for the OOM-killer while this function was executing", - format: MarkerFieldFormat::Integer, - searchable: false, - }), ]; MarkerSchema { @@ -467,10 +441,8 @@ impl<'a> ProfilerMarker for EventMarker<'a> { "thread_id": thread_id, }); - if let Some(MemoryStats { resident, shared, oom_score }) = self.memory_delta { + if let Some(MemoryStats { resident }) = self.memory_delta { value["resident"] = json!(resident); - value["shared"] = json!(shared); - value["oom_score"] = json!(oom_score); } value diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs index 8c6af1640..68e95c00c 100644 --- a/tracing-trace/src/processor/fmt.rs +++ b/tracing-trace/src/processor/fmt.rs @@ -188,9 +188,8 @@ fn print_duration(duration: std::time::Duration) -> String { } /// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. -fn print_memory(MemoryStats { resident, shared, oom_score }: MemoryStats) -> String { +fn print_memory(MemoryStats { resident }: MemoryStats) -> String { use byte_unit::Byte; let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true); - let shared_bytes = Byte::from_bytes(shared).get_appropriate_unit(true); - format!("RSS {rss_bytes:.2}, Shared {shared_bytes:.2}, OOM score {oom_score}") + format!("RSS {rss_bytes:.2}") } From 661baa716bad6cc16c3132dd242ba5e95aa1c5cd Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 6 Feb 2024 18:05:02 +0100 Subject: [PATCH 55/87] logs route profile mode: don't barf bytes if the buffer is not empty --- meilisearch/src/routes/logs.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index a62f6d648..48bf181be 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -186,6 +186,10 @@ fn entry_stream( }; if count == 0 { + if !bytes.is_empty() { + break; + } + // channel closed, exit return None; } From 4de2db6786a2b314fdae9d32baf5a3a450ceffc3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 09:20:18 +0100 Subject: [PATCH 56/87] add back the actix-web logs --- Cargo.lock | 40 ++++++++++++++++++++++++++++++++++++++++ meilisearch/Cargo.toml | 1 + meilisearch/src/lib.rs | 2 +- meilisearch/src/main.rs | 2 +- 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21d749727..dab12a56b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3745,6 +3745,7 @@ dependencies = [ "tokio-stream", "toml", "tracing", + "tracing-actix-web", "tracing-subscriber", "tracing-trace", "url", @@ -3984,6 +3985,12 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "mutually_exclusive_features" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" + [[package]] name = "nom" version = "7.1.3" @@ -4369,6 +4376,26 @@ dependencies = [ "siphasher 0.3.11", ] +[[package]] +name = "pin-project" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -5715,6 +5742,19 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "pin-project", + "tracing", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.27" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 2a7b5ade1..60b91207e 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -107,6 +107,7 @@ url = { version = "2.5.0", features = ["serde"] } tracing = "0.1.40" tracing-subscriber = "0.3.18" tracing-trace = { version = "0.1.0", path = "../tracing-trace" } +tracing-actix-web = "0.7.9" [dev-dependencies] actix-rt = "2.9.0" diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 2ebed39a3..56149ec6c 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -136,7 +136,7 @@ pub fn create_app( .allow_any_method() .max_age(86_400), // 24h ) - .wrap(actix_web::middleware::Logger::default()) + .wrap(tracing_actix_web::TracingLogger::default()) .wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) } diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 734f50de3..ccbe761fe 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -35,7 +35,7 @@ fn setup(opt: &Opt) -> anyhow::Result { let subscriber = tracing_subscriber::registry().with(route_layer).with( tracing_subscriber::fmt::layer() .with_line_number(true) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::NEW) .with_filter( tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) .unwrap(), From f3c34d5b8c6b227da3694eb2e587144d29d7af06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 7 Feb 2024 10:09:39 +0100 Subject: [PATCH 57/87] Simplify MemoryStats fetching --- tracing-trace/src/entry.rs | 5 +++++ tracing-trace/src/layer.rs | 8 +------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs index 29cf9326c..26e543ba0 100644 --- a/tracing-trace/src/entry.rs +++ b/tracing-trace/src/entry.rs @@ -121,6 +121,11 @@ impl MemoryStats { } } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + pub fn fetch() -> Option { + None + } + pub fn checked_sub(self, other: Self) -> Option { Some(Self { resident: self.resident.checked_sub(other.resident)? }) } diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index a2d3232c8..b30e1ad38 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -96,14 +96,8 @@ impl TraceLayer { self.start_time.elapsed() } - #[cfg(any(target_os = "linux", target_os = "macos"))] fn memory_stats(&self) -> Option { - Some(MemoryStats::fetch().unwrap()) - } - - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - fn memory_stats(&self) -> Option { - None + MemoryStats::fetch() } fn send(&self, entry: Entry) { From ceb211c5158448685f2aae040200e56999e9c493 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 12:13:57 +0100 Subject: [PATCH 58/87] move the /logs route to the /logs/stream route --- meilisearch/src/routes/logs.rs | 2 +- meilisearch/tests/auth/authorization.rs | 4 ++-- meilisearch/tests/logs/error.rs | 24 ++++++++++++------------ meilisearch/tests/logs/mod.rs | 8 ++++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 48bf181be..7ce7d7ea1 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -26,7 +26,7 @@ use crate::LogRouteHandle; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( - web::resource("") + web::resource("stream") .route(web::post().to(SeqHandler(get_logs))) .route(web::delete().to(SeqHandler(cancel_logs))), ); diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index 88635e62f..d26bb26b8 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -59,8 +59,8 @@ pub static AUTHORIZATIONS: Lazy hashset!{"snapshots.create", "snapshots.*", "*"}, ("GET", "/version") => hashset!{"version", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, - ("POST", "/logs") => hashset!{"metrics.get", "metrics.*", "*"}, - ("DELETE", "/logs") => hashset!{"metrics.get", "metrics.*", "*"}, + ("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, + ("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index b6c4605ed..c1755c299 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -4,11 +4,11 @@ use crate::common::Server; use crate::json; #[actix_rt::test] -async fn logs_bad_target() { +async fn logs_stream_bad_target() { let server = Server::new().await; // Wrong type - let (response, code) = server.service.post("/logs", json!({ "target": true })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -20,7 +20,7 @@ async fn logs_bad_target() { "###); // Wrong type - let (response, code) = server.service.post("/logs", json!({ "target": [] })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -32,7 +32,7 @@ async fn logs_bad_target() { "###); // Our help message - let (response, code) = server.service.post("/logs", json!({ "target": "" })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -44,7 +44,7 @@ async fn logs_bad_target() { "###); // An error from the target parser - let (response, code) = server.service.post("/logs", json!({ "target": "==" })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -57,11 +57,11 @@ async fn logs_bad_target() { } #[actix_rt::test] -async fn logs_bad_mode() { +async fn logs_stream_bad_mode() { let server = Server::new().await; // Wrong type - let (response, code) = server.service.post("/logs", json!({ "mode": true })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -73,7 +73,7 @@ async fn logs_bad_mode() { "###); // Wrong type - let (response, code) = server.service.post("/logs", json!({ "mode": [] })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -85,7 +85,7 @@ async fn logs_bad_mode() { "###); // Wrong value - let (response, code) = server.service.post("/logs", json!({ "mode": "tamo" })).await; + let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -98,10 +98,10 @@ async fn logs_bad_mode() { } #[actix_rt::test] -async fn logs_without_enabling_the_route() { +async fn logs_stream_without_enabling_the_route() { let server = Server::new().await; - let (response, code) = server.service.post("/logs", json!({})).await; + let (response, code) = server.service.post("/logs/stream", json!({})).await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { @@ -112,7 +112,7 @@ async fn logs_without_enabling_the_route() { } "###); - let (response, code) = server.service.delete("/logs").await; + let (response, code) = server.service.delete("/logs/stream").await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index ad1fa4048..f9331166d 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -14,7 +14,7 @@ use crate::common::{default_settings, Server}; use crate::json; #[actix_web::test] -async fn basic_test_log_route() { +async fn basic_test_log_stream_route() { let db_path = tempfile::tempdir().unwrap(); let server = Server::new_with_options(Opt { experimental_enable_logs_route: true, @@ -50,11 +50,11 @@ async fn basic_test_log_route() { let app = Rc::new(app); - // First, we start listening on the `/logs` route + // First, we start listening on the `/logs/stream` route let handle_app = app.clone(); let handle = tokio::task::spawn_local(async move { let req = actix_web::test::TestRequest::post() - .uri("/logs") + .uri("/logs/stream") .insert_header(ContentType::json()) .set_payload( serde_json::to_vec(&json!({ @@ -81,7 +81,7 @@ async fn basic_test_log_route() { "###); server.wait_task(ret.uid()).await; - let req = actix_web::test::TestRequest::delete().uri("/logs"); + let req = actix_web::test::TestRequest::delete().uri("/logs/stream"); let req = req.to_request(); let ret = actix_web::test::call_service(&*app, req).await; let code = ret.status(); From bcf7909bba38bd41f27791810bc1c2e03aa58f8b Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 14:45:40 +0100 Subject: [PATCH 59/87] add a profile_memory parameter disabled by default --- meilisearch/src/routes/logs.rs | 24 ++++++++++++-- meilisearch/tests/logs/error.rs | 57 +++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 7ce7d7ea1..ae93f0ea5 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -32,7 +32,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } -#[derive(Debug, Default, Clone, Copy, Deserr)] +#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)] #[deserr(rename_all = lowercase)] pub enum LogMode { #[default] @@ -82,13 +82,33 @@ impl MergeWithError for DeserrJsonError { } #[derive(Debug, Deserr)] -#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError)] pub struct GetLogs { #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] target: MyTargets, #[deserr(default, error = DeserrJsonError)] mode: LogMode, + + #[deserr(default = false, error = DeserrJsonError)] + profile_memory: bool, +} + +fn validate_get_logs( + logs: GetLogs, + location: ValuePointerRef, +) -> Result { + if logs.profile_memory && logs.mode != LogMode::Profile { + Err(deserr::take_cf_content(E::error::( + None, + ErrorKind::Unexpected { + msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode), + }, + location, + ))) + } else { + Ok(logs) + } } struct LogWriter { diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index c1755c299..7cbc39b1f 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -97,6 +97,63 @@ async fn logs_stream_bad_mode() { "###); } +#[actix_rt::test] +async fn logs_stream_bad_profile_memory() { + let server = Server::new().await; + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Used with default parameters + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.", + "code": "invalid_settings_typo_tolerance", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" + } + "###); + + // Used with an unsupported mode + let (response, code) = + server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.", + "code": "invalid_settings_typo_tolerance", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" + } + "###); +} + #[actix_rt::test] async fn logs_stream_without_enabling_the_route() { let server = Server::new().await; From 7ff722b72e328837776299ca4dc9d566b4fa9985 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 15:51:38 +0100 Subject: [PATCH 60/87] get rids of the log dependencies everywhere --- Cargo.lock | 4 +-- dump/Cargo.toml | 2 +- dump/src/reader/compat/v1_to_v2.rs | 10 +++---- dump/src/reader/compat/v2_to_v3.rs | 10 +++---- dump/src/reader/compat/v3_to_v4.rs | 8 ++--- dump/src/reader/compat/v4_to_v5.rs | 2 +- dump/src/reader/compat/v5_to_v6.rs | 4 +-- dump/src/reader/v6/mod.rs | 2 +- index-scheduler/Cargo.toml | 1 - index-scheduler/src/index_mapper/mod.rs | 2 +- index-scheduler/src/lib.rs | 30 +++++++++---------- meilisearch/Cargo.toml | 1 - meilisearch/src/lib.rs | 21 ++++++------- meilisearch/src/routes/dump.rs | 2 +- meilisearch/src/routes/features.rs | 2 +- meilisearch/src/routes/indexes/documents.rs | 4 +-- .../src/routes/indexes/facet_search.rs | 2 +- meilisearch/src/routes/indexes/mod.rs | 2 +- meilisearch/src/routes/indexes/search.rs | 2 +- meilisearch/src/routes/indexes/settings.rs | 4 +-- meilisearch/src/routes/mod.rs | 2 +- meilisearch/src/routes/multi_search.rs | 2 +- meilisearch/src/routes/snapshot.rs | 2 +- 23 files changed, 59 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dab12a56b..32ad13772 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1543,7 +1543,6 @@ dependencies = [ "big_s", "flate2", "http 0.2.11", - "log", "maplit", "meili-snap", "meilisearch-auth", @@ -1557,6 +1556,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "uuid", ] @@ -2943,7 +2943,6 @@ dependencies = [ "file-store", "flate2", "insta", - "log", "meili-snap", "meilisearch-auth", "meilisearch-types", @@ -3701,7 +3700,6 @@ dependencies = [ "itertools 0.11.0", "jsonwebtoken", "lazy_static", - "log", "manifest-dir-macros", "maplit", "meili-snap", diff --git a/dump/Cargo.toml b/dump/Cargo.toml index 941cec72d..92830c782 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -14,7 +14,6 @@ license.workspace = true anyhow = "1.0.79" flate2 = "1.0.28" http = "0.2.11" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } once_cell = "1.19.0" @@ -26,6 +25,7 @@ tar = "0.4.40" tempfile = "3.9.0" thiserror = "1.0.56" time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tracing = "0.1.40" uuid = { version = "1.6.1", features = ["serde", "v4"] } [dev-dependencies] diff --git a/dump/src/reader/compat/v1_to_v2.rs b/dump/src/reader/compat/v1_to_v2.rs index 789e8e0b1..0d050497b 100644 --- a/dump/src/reader/compat/v1_to_v2.rs +++ b/dump/src/reader/compat/v1_to_v2.rs @@ -120,7 +120,7 @@ impl From for v2::Settings { criterion.as_ref().map(ToString::to_string) } Err(()) => { - log::warn!( + tracing::warn!( "Could not import the following ranking rule: `{}`.", ranking_rule ); @@ -152,11 +152,11 @@ impl From for Option { use v2::updates::UpdateStatus as UpdateStatusV2; Some(match source { UpdateStatusV1::Enqueued { content } => { - log::warn!( + tracing::warn!( "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", content.update_id ); - log::warn!("Task will be skipped in the queue of imported tasks."); + tracing::warn!("Task will be skipped in the queue of imported tasks."); return None; } @@ -229,7 +229,7 @@ impl From for Option { Some(match source { v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::Customs => { - log::warn!("Ignoring task with type 'Customs' that is no longer supported"); + tracing::warn!("Ignoring task with type 'Customs' that is no longer supported"); return None; } v1::update::UpdateType::DocumentsAddition { .. } => { @@ -296,7 +296,7 @@ impl From for Option { v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::WordsPosition => { - log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); + tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); None } v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 1d5f4e153..1d4238290 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -146,8 +146,8 @@ impl From for v3::updates::UpdateStatus { started_processing_at: processing.started_processing_at, }), Err(e) => { - log::warn!("Error with task {}: {}", processing.from.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", processing.from.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -172,8 +172,8 @@ impl From for v3::updates::UpdateStatus { enqueued_at: enqueued.enqueued_at, }), Err(e) => { - log::warn!("Error with task {}: {}", enqueued.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", enqueued.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -353,7 +353,7 @@ impl From for v3::Code { "malformed_payload" => v3::Code::MalformedPayload, "missing_payload" => v3::Code::MissingPayload, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v3::Code::UnretrievableErrorCode } } diff --git a/dump/src/reader/compat/v3_to_v4.rs b/dump/src/reader/compat/v3_to_v4.rs index b4153eb31..244948200 100644 --- a/dump/src/reader/compat/v3_to_v4.rs +++ b/dump/src/reader/compat/v3_to_v4.rs @@ -76,20 +76,20 @@ impl CompatV3ToV4 { let index_uid = match index_uid { Some(uid) => uid, None => { - log::warn!( + tracing::warn!( "Error while importing the update {}.", task.update.id() ); - log::warn!( + tracing::warn!( "The index associated to the uuid `{}` could not be retrieved.", task.uuid.to_string() ); if task.update.is_finished() { // we're fucking with his history but not his data, that's ok-ish. - log::warn!("The index-uuid will be set as `unknown`."); + tracing::warn!("The index-uuid will be set as `unknown`."); String::from("unknown") } else { - log::warn!("The task will be ignored."); + tracing::warn!("The task will be ignored."); return None; } } diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs index 850e2cccd..aa8441779 100644 --- a/dump/src/reader/compat/v4_to_v5.rs +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -305,7 +305,7 @@ impl From for v5::ResponseError { "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v5::Code::UnretrievableErrorCode } }; diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 9351ae70d..e00d3a599 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -304,7 +304,7 @@ impl From for v6::ResponseError { "immutable_field" => v6::Code::BadRequest, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v6::Code::UnretrievableErrorCode } }; @@ -329,7 +329,7 @@ impl From> for v6::Settings { new_ranking_rules.push(new_rule); } Err(_) => { - log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") + tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") } } } diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs index 4e980e03e..50b9751a2 100644 --- a/dump/src/reader/v6/mod.rs +++ b/dump/src/reader/v6/mod.rs @@ -2,10 +2,10 @@ use std::fs::{self, File}; use std::io::{BufRead, BufReader, ErrorKind}; use std::path::Path; -use log::debug; pub use meilisearch_types::milli; use tempfile::TempDir; use time::OffsetDateTime; +use tracing::debug; use uuid::Uuid; use super::Document; diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 4e7fd1b64..890312854 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -19,7 +19,6 @@ dump = { path = "../dump" } enum-iterator = "1.5.0" file-store = { path = "../file-store" } flate2 = "1.0.28" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 58ec2bf11..14908120c 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock}; use std::time::Duration; use std::{fs, thread}; -use log::error; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{FieldDistribution, Index}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use tracing::error; use uuid::Uuid; use self::index_map::IndexMap; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 7f66c9427..7514a2a68 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -535,17 +535,17 @@ impl IndexScheduler { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { DEFAULT_BUDGET } else { - log::debug!("determining budget with dichotomic search"); + tracing::debug!("determining budget with dichotomic search"); utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { Self::is_good_heed(tasks_path, map_size) }) }; - log::debug!("memmap budget: {budget}B"); + tracing::debug!("memmap budget: {budget}B"); let mut budget = budget / 2; if task_db_size > (budget / 2) { task_db_size = clamp_to_page_size(budget * 2 / 5); - log::debug!( + tracing::debug!( "Decreasing max size of task DB to {task_db_size}B due to constrained memory space" ); } @@ -555,13 +555,13 @@ impl IndexScheduler { let budget = budget; let task_db_size = task_db_size; - log::debug!("index budget: {budget}B"); + tracing::debug!("index budget: {budget}B"); let mut index_count = budget / base_map_size; if index_count < 2 { // take a bit less than half than the budget to make sure we can always afford to open an index let map_size = (budget * 2) / 5; // single index of max budget - log::debug!("1 index of {map_size}B can be opened simultaneously."); + tracing::debug!("1 index of {map_size}B can be opened simultaneously."); return IndexBudget { map_size, index_count: 1, task_db_size }; } // give us some space for an additional index when the cache is already full @@ -570,7 +570,7 @@ impl IndexScheduler { if index_count > max_index_count { index_count = max_index_count; } - log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); + tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); IndexBudget { map_size: base_map_size, index_count, task_db_size } } @@ -617,7 +617,7 @@ impl IndexScheduler { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); // Wait one second when an irrecoverable error occurs. if !e.is_recoverable() { std::thread::sleep(Duration::from_secs(1)); @@ -634,15 +634,15 @@ impl IndexScheduler { let mut file = match File::create(format!("{}.puffin", now)) { Ok(file) => file, Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); continue; } }; if let Err(e) = frame_view.save_to_writer(&mut file) { - log::error!("{e}"); + tracing::error!("{e}"); } if let Err(e) = file.sync_all() { - log::error!("{e}"); + tracing::error!("{e}"); } // We erase this frame view as it is no more useful. We want to // measure the new frames now that we exported the previous ones. @@ -1190,7 +1190,7 @@ impl IndexScheduler { self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } } tracing::info!("A batch of tasks was successfully completed."); @@ -1247,7 +1247,7 @@ impl IndexScheduler { self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; @@ -1341,7 +1341,7 @@ impl IndexScheduler { }; if let Err(e) = request.send(reader) { - log::error!("While sending data to the webhook: {e}"); + tracing::error!("While sending data to the webhook: {e}"); } } @@ -1367,12 +1367,12 @@ impl IndexScheduler { // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // the deletion tasks we enqueued ourselves. if to_delete.len() < 2 { - log::warn!("The task queue is almost full, but no task can be deleted yet."); + tracing::warn!("The task queue is almost full, but no task can be deleted yet."); // the only thing we can do is hope that the user tasks are going to finish return Ok(()); } - log::info!( + tracing::info!( "The task queue is almost full. Deleting the oldest {} finished tasks.", to_delete.len() ); diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 60b91207e..7fbabba87 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -54,7 +54,6 @@ is-terminal = "0.4.10" itertools = "0.11.0" jsonwebtoken = "8.3.0" lazy_static = "1.4.0" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } mimalloc = { version = "0.1.39", default-features = false } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 56149ec6c..2f7305365 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -29,7 +29,6 @@ use error::PayloadError; use extractors::payload::PayloadConfig; use http::header::CONTENT_TYPE; use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; -use log::error; use meilisearch_auth::AuthController; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; @@ -39,6 +38,7 @@ use meilisearch_types::versioning::{check_version_file, create_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; +use tracing::error; use tracing_subscriber::filter::Targets; use crate::error::MeilisearchHttpError; @@ -293,13 +293,13 @@ fn import_dump( let mut dump_reader = dump::DumpReader::open(reader)?; if let Some(date) = dump_reader.date() { - log::info!( + tracing::info!( "Importing a dump of meilisearch `{:?}` from the {}", dump_reader.version(), // TODO: get the meilisearch version instead of the dump version date ); } else { - log::info!( + tracing::info!( "Importing a dump of meilisearch `{:?}`", dump_reader.version(), // TODO: get the meilisearch version instead of the dump version ); @@ -335,7 +335,7 @@ fn import_dump( for index_reader in dump_reader.indexes()? { let mut index_reader = index_reader?; let metadata = index_reader.metadata(); - log::info!("Importing index `{}`.", metadata.uid); + tracing::info!("Importing index `{}`.", metadata.uid); let date = Some((metadata.created_at, metadata.updated_at)); let index = index_scheduler.create_raw_index(&metadata.uid, date)?; @@ -349,14 +349,15 @@ fn import_dump( } // 4.2 Import the settings. - log::info!("Importing the settings."); + tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; + builder + .execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?; // 4.3 Import the documents. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. - log::info!("Importing the documents."); + tracing::info!("Importing the documents."); let file = tempfile::tempfile()?; let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); for document in index_reader.documents()? { @@ -378,15 +379,15 @@ fn import_dump( update_method: IndexDocumentsMethod::ReplaceDocuments, ..Default::default() }, - |indexing_step| log::trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!("update: {:?}", indexing_step), || false, )?; let (builder, user_result) = builder.add_documents(reader)?; - log::info!("{} documents found.", user_result?); + tracing::info!("{} documents found.", user_result?); builder.execute()?; wtxn.commit()?; - log::info!("All documents successfully imported."); + tracing::info!("All documents successfully imported."); } let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 0aabd2aa6..9f55a3a9a 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -1,7 +1,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; +use tracing::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 375201a97..132460159 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::Deserr; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::ActionPolicy; diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 6d59f60dd..f1124ac3e 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -8,7 +8,6 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -28,6 +27,7 @@ use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; +use tracing::debug; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; @@ -427,7 +427,7 @@ async fn document_addition( Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} Err(e) => { - log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + tracing::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); } } // We still want to return the original error to the end user. diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 4b5d4d78a..dbd84281f 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -2,12 +2,12 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use serde_json::Value; +use tracing::debug; use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 81b5c3f2e..a071ff536 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::{DeserializeError, Deserr, ValuePointerRef}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -16,6 +15,7 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index c474d285e..061475798 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use index_scheduler::IndexScheduler; -use log::{debug, warn}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -12,6 +11,7 @@ use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; +use tracing::{debug, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index decc4ffc9..35f58e13c 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; @@ -11,6 +10,7 @@ use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -24,12 +24,12 @@ macro_rules! make_setting_route { use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; use index_scheduler::IndexScheduler; - use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; + use tracing::debug; use $crate::analytics::Analytics; use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 546c5e219..19feac324 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -3,7 +3,6 @@ use std::collections::BTreeMap; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::settings::{Settings, Unchecked}; @@ -11,6 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 8e81688e6..b09841fab 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -3,11 +3,11 @@ use actix_web::web::{self, Data}; use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; +use tracing::debug; use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::extractors::authentication::policies::ActionPolicy; diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 7fa22658a..001df2150 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -1,10 +1,10 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; From f70a615ed92eeaaf9748a9523525c41e5461e48b Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 15:56:47 +0100 Subject: [PATCH 61/87] update the github discussion links --- index-scheduler/src/features.rs | 3 +-- meilisearch/src/option.rs | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 9eaa658d9..2e311d1bd 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -50,8 +50,7 @@ impl RoFeatures { Err(FeatureNotEnabledError { disabled_action: "getting logs through the `/logs` route", feature: "logs route", - /// Update the discussion link - issue_link: "https://github.com/meilisearch/product/discussions/625", + issue_link: "https://github.com/orgs/meilisearch/discussions/721", } .into()) } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 362f7a33f..3a9b634b1 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -310,8 +310,7 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, - // TODO: update the link - /// Experimental logs route feature. For more information, see: + /// Experimental logs route feature. For more information, see: /// /// Enables the log route on the `POST /logs` endpoint and the `DELETE /logs` to stop receiving logs. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] From 902d700a24f58669dfd20249d70a265ed5288315 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 15:48:07 +0100 Subject: [PATCH 62/87] Tracing trace: toggle the profiling of memory at runtime --- tracing-trace/src/layer.rs | 14 ++++++++++---- tracing-trace/src/main.rs | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs index b30e1ad38..f2f1d64ae 100644 --- a/tracing-trace/src/layer.rs +++ b/tracing-trace/src/layer.rs @@ -20,24 +20,26 @@ pub struct TraceLayer { sender: tokio::sync::mpsc::UnboundedSender, callsites: RwLock>, start_time: std::time::Instant, + profile_memory: bool, } impl Trace { - pub fn new() -> (Self, TraceLayer) { + pub fn new(profile_memory: bool) -> (Self, TraceLayer) { let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let trace = Trace { receiver }; let layer = TraceLayer { sender, callsites: Default::default(), start_time: std::time::Instant::now(), + profile_memory, }; (trace, layer) } } impl TraceWriter { - pub fn new(writer: W) -> (Self, TraceLayer) { - let (trace, layer) = Trace::new(); + pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) { + let (trace, layer) = Trace::new(profile_memory); (trace.into_writer(writer), layer) } @@ -97,7 +99,11 @@ impl TraceLayer { } fn memory_stats(&self) -> Option { - MemoryStats::fetch() + if self.profile_memory { + MemoryStats::fetch() + } else { + None + } } fn send(&self, entry: Entry) { diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs index c74bf1fb7..b9202727d 100644 --- a/tracing-trace/src/main.rs +++ b/tracing-trace/src/main.rs @@ -74,7 +74,7 @@ fn on_panic(info: &std::panic::PanicInfo) { fn main() { let (mut trace, profiling_layer) = - tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap()); + tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true); let subscriber = tracing_subscriber::registry() // any number of other subscriber layers may be added before or From 91eb67e981902ecddb3a2aac54c2dac518347802 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 15:48:21 +0100 Subject: [PATCH 63/87] logs route: make memory profiling toggling usable --- meilisearch/src/routes/logs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index ae93f0ea5..db80d976d 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -172,7 +172,7 @@ fn make_layer< (Box::new(fmt_layer) as Box + Send + Sync>, Box::pin(stream)) } LogMode::Profile => { - let (trace, layer) = tracing_trace::Trace::new(); + let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory); let stream = entry_stream(trace, guard); From db722d201a892f6ac2d4e435d57b32694cecc39f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 15:57:26 +0100 Subject: [PATCH 64/87] Write entries into database downgraded to trace level --- milli/src/update/index_documents/typed_chunk.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 37399dcd5..af828fee6 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -575,7 +575,7 @@ fn merge_word_docids_reader_into_fst( /// Write provided entries in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. -#[tracing::instrument(skip_all, target = "indexing::write_db")] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn write_entries_into_database( data: grenad::Reader, database: &heed::Database, From c443ed7e3f48606fae8418a9d81c505a93d92089 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 16:42:30 +0100 Subject: [PATCH 65/87] delete inner .gitignore --- tracing-trace/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tracing-trace/.gitignore diff --git a/tracing-trace/.gitignore b/tracing-trace/.gitignore deleted file mode 100644 index ea8c4bf7f..000000000 --- a/tracing-trace/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/target From d71b77f18b1ea992b450808aff26384a4f93ea94 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 7 Feb 2024 17:29:40 +0100 Subject: [PATCH 66/87] Add panic hook to log panics --- meilisearch/src/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index ccbe761fe..f7a8e6cff 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -48,10 +48,17 @@ fn setup(opt: &Opt) -> anyhow::Result { Ok(route_layer_handle) } +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string().replace('\n', " "); + tracing::error!(%info); +} + #[actix_web::main] async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + std::panic::set_hook(Box::new(on_panic)); + anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" From 08af0e690c9becac11645e4f63a11f506abb4160 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 7 Feb 2024 17:55:40 +0100 Subject: [PATCH 67/87] Structures a bunch of logs --- meilisearch/src/lib.rs | 13 +++---- meilisearch/src/routes/dump.rs | 4 +-- meilisearch/src/routes/features.rs | 9 +++-- meilisearch/src/routes/indexes/documents.rs | 36 ++++++++++--------- .../src/routes/indexes/facet_search.rs | 6 ++-- meilisearch/src/routes/indexes/mod.rs | 16 +++++---- meilisearch/src/routes/indexes/search.rs | 10 +++--- meilisearch/src/routes/indexes/settings.rs | 18 +++++----- meilisearch/src/routes/mod.rs | 4 +-- meilisearch/src/routes/multi_search.rs | 6 ++-- meilisearch/src/routes/snapshot.rs | 4 +-- meilisearch/src/routes/tasks.rs | 1 - .../src/update/index_documents/extract/mod.rs | 10 +++--- milli/src/update/index_documents/mod.rs | 7 ++-- 14 files changed, 77 insertions(+), 67 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 2f7305365..711a11a0a 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -294,14 +294,14 @@ fn import_dump( if let Some(date) = dump_reader.date() { tracing::info!( - "Importing a dump of meilisearch `{:?}` from the {}", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version - date + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + %date, + "Importing a dump of meilisearch" ); } else { tracing::info!( - "Importing a dump of meilisearch `{:?}`", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + "Importing a dump of meilisearch", ); } @@ -384,7 +384,8 @@ fn import_dump( )?; let (builder, user_result) = builder.add_documents(reader)?; - tracing::info!("{} documents found.", user_result?); + let user_result = user_result?; + tracing::info!(documents_found = user_result, "{} documents found.", user_result); builder.execute()?; wtxn.commit()?; tracing::info!("All documents successfully imported."); diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 9f55a3a9a..b604985fc 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -1,11 +1,11 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use tracing::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug_span; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -32,6 +32,6 @@ pub async fn create_dump( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Create dump", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 132460159..cc29b8995 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -7,7 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde_json::json; -use tracing::debug; +use tracing::{debug_span}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::ActionPolicy; @@ -33,8 +33,9 @@ async fn get_features( let features = index_scheduler.features(); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); - debug!("returns: {:?}", features.runtime_features()); - HttpResponse::Ok().json(features.runtime_features()) + let features = features.runtime_features(); + debug_span!("Get features", returns = ?features); + HttpResponse::Ok().json(features) } #[derive(Debug, Deserr)] @@ -60,6 +61,7 @@ async fn patch_features( analytics: Data, ) -> Result { let features = index_scheduler.features(); + debug_span!("Patch features", parameters = ?new_features); let old_features = features.runtime_features(); let new_features = meilisearch_types::features::RuntimeTogglableFeatures { @@ -93,5 +95,6 @@ async fn patch_features( Some(&req), ); index_scheduler.put_runtime_features(new_features)?; + debug_span!("Patch features", returns = ?new_features); Ok(HttpResponse::Ok().json(new_features)) } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index f1124ac3e..19d617c27 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -27,7 +27,7 @@ use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; -use tracing::debug; +use tracing::{debug_span}; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; @@ -101,6 +101,7 @@ pub async fn get_document( analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); + debug_span!("Get document", parameters = ?params); let index_uid = IndexUid::try_from(index_uid)?; analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); @@ -110,7 +111,7 @@ pub async fn get_document( let index = index_scheduler.index(&index_uid)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; - debug!("returns: {:?}", document); + debug_span!("Get document", returns = ?document); Ok(HttpResponse::Ok().json(document)) } @@ -131,7 +132,7 @@ pub async fn delete_document( }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Delete document", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -168,9 +169,8 @@ pub async fn documents_by_query_post( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with body: {:?}", body); - let body = body.into_inner(); + debug_span!("Get documents POST", parameters = ?body); analytics.post_fetch_documents( &DocumentFetchKind::Normal { @@ -191,7 +191,7 @@ pub async fn get_documents( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug_span!("Get documents GET", parameters = ?params); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); @@ -235,7 +235,7 @@ fn documents_by_query( let ret = PaginationView::new(offset, limit, total as usize, documents); - debug!("returns: {:?}", ret); + debug_span!("Get documents", returns = ?ret); Ok(HttpResponse::Ok().json(ret)) } @@ -271,7 +271,7 @@ pub async fn replace_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); + debug_span!("Replace documents", parameters = ?params); let params = params.into_inner(); analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -288,6 +288,7 @@ pub async fn replace_documents( allow_index_creation, ) .await?; + debug_span!("Replace documents", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -302,8 +303,8 @@ pub async fn update_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); let params = params.into_inner(); + debug_span!("Update documents", parameters = ?params); analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -319,6 +320,7 @@ pub async fn update_documents( allow_index_creation, ) .await?; + debug_span!("Update documents", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -427,7 +429,10 @@ async fn document_addition( Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} Err(e) => { - tracing::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + tracing::warn!( + index_uuid = %uuid, + "Unknown error happened while deleting a malformed update file: {e}" + ); } } // We still want to return the original error to the end user. @@ -453,7 +458,6 @@ async fn document_addition( } }; - debug!("returns: {:?}", task); Ok(task.into()) } @@ -464,7 +468,7 @@ pub async fn delete_documents_batch( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug_span!("Delete documents by batch", parameters = ?body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); @@ -479,7 +483,7 @@ pub async fn delete_documents_batch( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Delete documents by batch", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -497,7 +501,7 @@ pub async fn delete_documents_by_filter( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug_span!("Delete documents by filter", parameters = ?body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; @@ -515,7 +519,7 @@ pub async fn delete_documents_by_filter( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Delete documents by filter", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -532,7 +536,7 @@ pub async fn clear_all_documents( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Delete all documents", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index dbd84281f..21746177d 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -7,7 +7,7 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use serde_json::Value; -use tracing::debug; +use tracing::debug_span; use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; @@ -56,7 +56,7 @@ pub async fn search( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); - debug!("facet search called with params: {:?}", query); + debug_span!("Facet search", parameters = ?query); let mut aggregate = FacetSearchAggregator::from_query(&query, &req); @@ -83,7 +83,7 @@ pub async fn search( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug_span!("Facet search", returns = ?search_result); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index a071ff536..69eb207b8 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -15,7 +15,7 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use serde_json::json; use time::OffsetDateTime; -use tracing::debug; +use tracing::debug_span; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; @@ -93,6 +93,7 @@ pub async fn list_indexes( index_scheduler: GuardedData, Data>, paginate: AwebQueryParameter, ) -> Result { + debug_span!("List indexes", parameters = ?paginate); let filters = index_scheduler.filters(); let indexes: Vec> = index_scheduler.try_for_each_index(|uid, index| -> Result, _> { @@ -105,7 +106,7 @@ pub async fn list_indexes( let indexes: Vec = indexes.into_iter().flatten().collect(); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); - debug!("returns: {:?}", ret); + debug_span!("List indexes", returns = ?ret); Ok(HttpResponse::Ok().json(ret)) } @@ -124,6 +125,7 @@ pub async fn create_index( req: HttpRequest, analytics: web::Data, ) -> Result { + debug_span!("Create index", parameters = ?body); let IndexCreateRequest { primary_key, uid } = body.into_inner(); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); @@ -137,6 +139,7 @@ pub async fn create_index( let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + debug_span!("Create index", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } else { @@ -177,7 +180,7 @@ pub async fn get_index( let index = index_scheduler.index(&index_uid)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?; - debug!("returns: {:?}", index_view); + debug_span!("Get index", returns = ?index_view); Ok(HttpResponse::Ok().json(index_view)) } @@ -189,7 +192,7 @@ pub async fn update_index( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug_span!("Update index", parameters = ?body); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( @@ -206,7 +209,7 @@ pub async fn update_index( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Update index", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -218,6 +221,7 @@ pub async fn delete_index( let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + debug_span!("Delete index", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -255,6 +259,6 @@ pub async fn get_index_stats( let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); - debug!("returns: {:?}", stats); + debug_span!("Get index stats", returns = ?stats); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 061475798..83b496d0a 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -11,7 +11,7 @@ use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; -use tracing::{debug, warn}; +use tracing::{debug_span, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; @@ -186,7 +186,7 @@ pub async fn search_with_url_query( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug_span!("Search get", parameters = ?params); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query: SearchQuery = params.into_inner().into(); @@ -213,7 +213,7 @@ pub async fn search_with_url_query( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug_span!("Search get", returns = ?search_result); Ok(HttpResponse::Ok().json(search_result)) } @@ -227,7 +227,7 @@ pub async fn search_with_post( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query = params.into_inner(); - debug!("search called with params: {:?}", query); + debug_span!("Search post", parameters = ?query); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -252,7 +252,7 @@ pub async fn search_with_post( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug_span!("Search post", returns = ?search_result); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 35f58e13c..1613b8813 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -10,7 +10,7 @@ use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use serde_json::json; -use tracing::debug; +use tracing::{debug_span}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -29,7 +29,7 @@ macro_rules! make_setting_route { use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; - use tracing::debug; + use tracing::debug_span; use $crate::analytics::Analytics; use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; @@ -61,7 +61,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug!("returns: {:?}", task); + debug_span!("Delete settings", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -78,6 +78,7 @@ macro_rules! make_setting_route { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); + debug_span!("Update settings", parameters = ?body); #[allow(clippy::redundant_closure_call)] $analytics(&body, &req); @@ -109,7 +110,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug!("returns: {:?}", task); + debug_span!("Update settings", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -126,7 +127,7 @@ macro_rules! make_setting_route { let rtxn = index.read_txn()?; let settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", settings); + debug_span!("Update settings", returns = ?settings); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -656,6 +657,7 @@ pub async fn update_all( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let new_settings = body.into_inner(); + debug_span!("Update all settings", parameters = ?new_settings); let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( @@ -768,7 +770,7 @@ pub async fn update_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Update all settings", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } @@ -781,7 +783,7 @@ pub async fn get_all( let index = index_scheduler.index(&index_uid)?; let rtxn = index.read_txn()?; let new_settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", new_settings); + debug_span!("Get all settings", returns = ?new_settings); Ok(HttpResponse::Ok().json(new_settings)) } @@ -804,7 +806,7 @@ pub async fn delete_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Delete all settings", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 19feac324..ed0ccd8db 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -10,7 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; -use tracing::debug; +use tracing::{debug_span}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -252,7 +252,7 @@ async fn get_stats( let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; - debug!("returns: {:?}", stats); + debug_span!("Get stats", returns = ?stats); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index b09841fab..2a369bed1 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -7,7 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; -use tracing::debug; +use tracing::{debug_span}; use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::extractors::authentication::policies::ActionPolicy; @@ -52,7 +52,7 @@ pub async fn multi_search_with_post( for (query_index, (index_uid, mut query)) in queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() { - debug!("multi-search #{query_index}: called with params: {:?}", query); + debug_span!("Multi-search", on_index = query_index, parameters = ?query); // Check index from API key if !index_scheduler.filters().is_index_authorized(&index_uid) { @@ -107,7 +107,7 @@ pub async fn multi_search_with_post( err })?; - debug!("returns: {:?}", search_results); + debug_span!("Multi-search", returns = ?search_results); Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) } diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 001df2150..4f329d251 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -4,7 +4,7 @@ use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; -use tracing::debug; +use tracing::debug_span; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -27,6 +27,6 @@ pub async fn create_snapshot( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", task); + debug_span!("Create snapshot", returns = ?task); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 9bb5892b5..03b63001d 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -263,7 +263,6 @@ async fn get_tasks( req: HttpRequest, analytics: web::Data, ) -> Result { - tracing::info!("You called tasks"); let mut params = params.into_inner(); analytics.get_tasks(¶ms, &req); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 357cdf8d7..64dc0b7db 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -15,7 +15,7 @@ use std::io::BufReader; use crossbeam_channel::Sender; use rayon::prelude::*; -use tracing::debug; +use tracing::{debug_span}; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; @@ -114,7 +114,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-exists-docids"); + debug_span!("merge", database = "facet-id-exists-docids"); match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); @@ -130,7 +130,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-null-docids"); + debug_span!("merge", database = "facet-id-is-null-docids"); match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); @@ -146,7 +146,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-empty-docids"); + debug_span!("merge", database = "facet-id-is-empty-docids"); match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); @@ -272,7 +272,7 @@ fn spawn_extraction_task( Ok(chunks) => { let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "merge_multiple_chunks"); let _entered = child_span.enter(); - debug!("merge {} database", name); + debug_span!("merge", database = name); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index ca3d6bdd1..a4981823f 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -17,7 +17,7 @@ use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; -use tracing::debug; +use tracing::{debug_span}; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; @@ -506,10 +506,7 @@ where documents_seen: documents_seen_count as usize, total_documents: documents_count, }); - debug!( - "We have seen {} documents on {} total document so far", - documents_seen_count, documents_count - ); + debug_span!("Seen", documents = documents_seen_count, total = documents_count); } if is_merged_database { databases_seen += 1; From 1b74010e9eb985c06c6ea369ca18b99f58831d29 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 8 Feb 2024 09:43:21 +0100 Subject: [PATCH 68/87] Remove "with_line_numbers" --- meilisearch/src/main.rs | 1 - meilisearch/src/routes/logs.rs | 1 - meilisearch/tests/logs/mod.rs | 1 - tracing-trace/src/main.rs | 13 +------------ 4 files changed, 1 insertion(+), 15 deletions(-) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index f7a8e6cff..839550667 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -34,7 +34,6 @@ fn setup(opt: &Opt) -> anyhow::Result { let subscriber = tracing_subscriber::registry().with(route_layer).with( tracing_subscriber::fmt::layer() - .with_line_number(true) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::NEW) .with_filter( tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index db80d976d..39cf63d9e 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -164,7 +164,6 @@ fn make_layer< let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let fmt_layer = tracing_subscriber::fmt::layer() - .with_line_number(true) .with_writer(move || LogWriter { sender: sender.clone() }) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index f9331166d..4091ac775 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -30,7 +30,6 @@ async fn basic_test_log_stream_route() { let subscriber = tracing_subscriber::registry().with(route_layer).with( tracing_subscriber::fmt::layer() - .with_line_number(true) .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) .with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()), ); diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs index b9202727d..c2e4f08a7 100644 --- a/tracing-trace/src/main.rs +++ b/tracing-trace/src/main.rs @@ -80,18 +80,7 @@ fn main() { // any number of other subscriber layers may be added before or // after the `ErrorLayer`... .with(ErrorLayer::default()) - .with(profiling_layer) - /*.with( - tracing_subscriber::fmt::layer() - .with_line_number(true) - .with_span_events(FmtSpan::FULL), /*.with_filter( - tracing_subscriber::filter::LevelFilter::from_level(tracing::Level::TRACE).and( - tracing_subscriber::filter::Targets::new() - .with_target("profile", tracing::Level::TRACE) - .not(), - ), - )*/ - )*/; + .with(profiling_layer); // set the subscriber as the default for the application tracing::subscriber::set_global_default(subscriber).unwrap(); From ef994d84d0bedb6307a39ab4dadb8a6ea14ccc25 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 8 Feb 2024 09:43:39 +0100 Subject: [PATCH 69/87] Change error messages and fix tests --- index-scheduler/src/features.rs | 2 +- meilisearch/src/error.rs | 2 +- meilisearch/src/option.rs | 2 +- meilisearch/tests/logs/error.rs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index 2e311d1bd..4fd5bd0e7 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -48,7 +48,7 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "getting logs through the `/logs` route", + disabled_action: "getting logs through the `/logs/stream` route", feature: "logs route", issue_link: "https://github.com/orgs/meilisearch/discussions/721", } diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 6c5f76a72..a8351fd1f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -12,7 +12,7 @@ pub enum MeilisearchHttpError { #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", "))] MissingContentType(Vec), - #[error("Log route is currently used by someone else.")] + #[error("The `/logs/stream` route is currently in use by someone else.")] AlreadyUsedLogRoute, #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] CsvDelimiterWithWrongContentType(String), diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 3a9b634b1..9586a3f6f 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -312,7 +312,7 @@ pub struct Opt { /// Experimental logs route feature. For more information, see: /// - /// Enables the log route on the `POST /logs` endpoint and the `DELETE /logs` to stop receiving logs. + /// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs. #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] #[serde(default)] pub experimental_enable_logs_route: bool, diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index 7cbc39b1f..078302632 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -162,7 +162,7 @@ async fn logs_stream_without_enabling_the_route() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "getting logs through the `/logs` route requires enabling the `logs route` experimental feature. See https://github.com/meilisearch/product/discussions/625", + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -173,7 +173,7 @@ async fn logs_stream_without_enabling_the_route() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "getting logs through the `/logs` route requires enabling the `logs route` experimental feature. See https://github.com/meilisearch/product/discussions/625", + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" From 15023823169954fbefdb3f73d0b06a1e6f0aa770 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 10:14:50 +0100 Subject: [PATCH 70/87] use debug instead of debug_span --- meilisearch/src/routes/dump.rs | 4 +-- meilisearch/src/routes/features.rs | 8 ++--- meilisearch/src/routes/indexes/documents.rs | 32 +++++++++---------- .../src/routes/indexes/facet_search.rs | 6 ++-- meilisearch/src/routes/indexes/mod.rs | 20 ++++++------ meilisearch/src/routes/indexes/search.rs | 10 +++--- meilisearch/src/routes/indexes/settings.rs | 20 ++++++------ meilisearch/src/routes/mod.rs | 4 +-- meilisearch/src/routes/multi_search.rs | 6 ++-- meilisearch/src/routes/snapshot.rs | 4 +-- .../src/update/index_documents/extract/mod.rs | 19 +++++------ 11 files changed, 67 insertions(+), 66 deletions(-) diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index b604985fc..071ae60b8 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -5,7 +5,7 @@ use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; -use tracing::debug_span; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -32,6 +32,6 @@ pub async fn create_dump( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Create dump", returns = ?task); + debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index cc29b8995..227b485c5 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -7,7 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde_json::json; -use tracing::{debug_span}; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::ActionPolicy; @@ -34,7 +34,7 @@ async fn get_features( analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); let features = features.runtime_features(); - debug_span!("Get features", returns = ?features); + debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) } @@ -61,7 +61,7 @@ async fn patch_features( analytics: Data, ) -> Result { let features = index_scheduler.features(); - debug_span!("Patch features", parameters = ?new_features); + debug!(parameters = ?new_features, "Patch features"); let old_features = features.runtime_features(); let new_features = meilisearch_types::features::RuntimeTogglableFeatures { @@ -95,6 +95,6 @@ async fn patch_features( Some(&req), ); index_scheduler.put_runtime_features(new_features)?; - debug_span!("Patch features", returns = ?new_features); + debug!(returns = ?new_features, "Patch features"); Ok(HttpResponse::Ok().json(new_features)) } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 19d617c27..1f41fa10c 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -27,7 +27,7 @@ use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; -use tracing::{debug_span}; +use tracing::debug; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; @@ -101,7 +101,7 @@ pub async fn get_document( analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); - debug_span!("Get document", parameters = ?params); + debug!(parameters = ?params, "Get document"); let index_uid = IndexUid::try_from(index_uid)?; analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); @@ -111,7 +111,7 @@ pub async fn get_document( let index = index_scheduler.index(&index_uid)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; - debug_span!("Get document", returns = ?document); + debug!(returns = ?document, "Get document"); Ok(HttpResponse::Ok().json(document)) } @@ -132,7 +132,7 @@ pub async fn delete_document( }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete document", returns = ?task); + debug!(returns = ?task, "Delete document"); Ok(HttpResponse::Accepted().json(task)) } @@ -170,7 +170,7 @@ pub async fn documents_by_query_post( analytics: web::Data, ) -> Result { let body = body.into_inner(); - debug_span!("Get documents POST", parameters = ?body); + debug!(parameters = ?body, "Get documents POST"); analytics.post_fetch_documents( &DocumentFetchKind::Normal { @@ -191,7 +191,7 @@ pub async fn get_documents( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Get documents GET", parameters = ?params); + debug!(parameters = ?params, "Get documents GET"); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); @@ -235,7 +235,7 @@ fn documents_by_query( let ret = PaginationView::new(offset, limit, total as usize, documents); - debug_span!("Get documents", returns = ?ret); + debug!(returns = ?ret, "Get documents"); Ok(HttpResponse::Ok().json(ret)) } @@ -271,7 +271,7 @@ pub async fn replace_documents( ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug_span!("Replace documents", parameters = ?params); + debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -288,7 +288,7 @@ pub async fn replace_documents( allow_index_creation, ) .await?; - debug_span!("Replace documents", returns = ?task); + debug!(returns = ?task, "Replace documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -304,7 +304,7 @@ pub async fn update_documents( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let params = params.into_inner(); - debug_span!("Update documents", parameters = ?params); + debug!(parameters = ?params, "Update documents"); analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); @@ -320,7 +320,7 @@ pub async fn update_documents( allow_index_creation, ) .await?; - debug_span!("Update documents", returns = ?task); + debug!(returns = ?task, "Update documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -468,7 +468,7 @@ pub async fn delete_documents_batch( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Delete documents by batch", parameters = ?body); + debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); @@ -483,7 +483,7 @@ pub async fn delete_documents_batch( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete documents by batch", returns = ?task); + debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) } @@ -501,7 +501,7 @@ pub async fn delete_documents_by_filter( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Delete documents by filter", parameters = ?body); + debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; @@ -519,7 +519,7 @@ pub async fn delete_documents_by_filter( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete documents by filter", returns = ?task); + debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) } @@ -536,7 +536,7 @@ pub async fn clear_all_documents( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete all documents", returns = ?task); + debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 21746177d..a980fb278 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -7,7 +7,7 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use serde_json::Value; -use tracing::debug_span; +use tracing::debug; use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; @@ -56,7 +56,7 @@ pub async fn search( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); - debug_span!("Facet search", parameters = ?query); + debug!(parameters = ?query, "Facet search"); let mut aggregate = FacetSearchAggregator::from_query(&query, &req); @@ -83,7 +83,7 @@ pub async fn search( let search_result = search_result?; - debug_span!("Facet search", returns = ?search_result); + debug!(returns = ?search_result, "Facet search"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 69eb207b8..d80bd9c61 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -15,7 +15,7 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use serde_json::json; use time::OffsetDateTime; -use tracing::debug_span; +use tracing::debug; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; @@ -93,7 +93,7 @@ pub async fn list_indexes( index_scheduler: GuardedData, Data>, paginate: AwebQueryParameter, ) -> Result { - debug_span!("List indexes", parameters = ?paginate); + debug!(parameters = ?paginate, "List indexes"); let filters = index_scheduler.filters(); let indexes: Vec> = index_scheduler.try_for_each_index(|uid, index| -> Result, _> { @@ -106,7 +106,7 @@ pub async fn list_indexes( let indexes: Vec = indexes.into_iter().flatten().collect(); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); - debug_span!("List indexes", returns = ?ret); + debug!(returns = ?ret, "List indexes"); Ok(HttpResponse::Ok().json(ret)) } @@ -125,7 +125,7 @@ pub async fn create_index( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Create index", parameters = ?body); + debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); @@ -139,7 +139,7 @@ pub async fn create_index( let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Create index", returns = ?task); + debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) } else { @@ -180,7 +180,7 @@ pub async fn get_index( let index = index_scheduler.index(&index_uid)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?; - debug_span!("Get index", returns = ?index_view); + debug!(returns = ?index_view, "Get index"); Ok(HttpResponse::Ok().json(index_view)) } @@ -192,7 +192,7 @@ pub async fn update_index( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Update index", parameters = ?body); + debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( @@ -209,7 +209,7 @@ pub async fn update_index( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Update index", returns = ?task); + debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) } @@ -221,7 +221,7 @@ pub async fn delete_index( let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete index", returns = ?task); + debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) } @@ -259,6 +259,6 @@ pub async fn get_index_stats( let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); - debug_span!("Get index stats", returns = ?stats); + debug!(returns = ?stats, "Get index stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 83b496d0a..3adfce970 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -11,7 +11,7 @@ use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; -use tracing::{debug_span, warn}; +use tracing::{debug, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; @@ -186,7 +186,7 @@ pub async fn search_with_url_query( req: HttpRequest, analytics: web::Data, ) -> Result { - debug_span!("Search get", parameters = ?params); + debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query: SearchQuery = params.into_inner().into(); @@ -213,7 +213,7 @@ pub async fn search_with_url_query( let search_result = search_result?; - debug_span!("Search get", returns = ?search_result); + debug!(returns = ?search_result, "Search get"); Ok(HttpResponse::Ok().json(search_result)) } @@ -227,7 +227,7 @@ pub async fn search_with_post( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query = params.into_inner(); - debug_span!("Search post", parameters = ?query); + debug!(parameters = ?query, "Search post"); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -252,7 +252,7 @@ pub async fn search_with_post( let search_result = search_result?; - debug_span!("Search post", returns = ?search_result); + debug!(returns = ?search_result, "Search post"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 1613b8813..23e8925c7 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -10,7 +10,7 @@ use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use serde_json::json; -use tracing::{debug_span}; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -29,7 +29,7 @@ macro_rules! make_setting_route { use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; - use tracing::debug_span; + use tracing::debug; use $crate::analytics::Analytics; use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; @@ -61,7 +61,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug_span!("Delete settings", returns = ?task); + debug!(returns = ?task, "Delete settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -78,7 +78,7 @@ macro_rules! make_setting_route { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); - debug_span!("Update settings", parameters = ?body); + debug!(parameters = ?body, "Update settings"); #[allow(clippy::redundant_closure_call)] $analytics(&body, &req); @@ -110,7 +110,7 @@ macro_rules! make_setting_route { .await?? .into(); - debug_span!("Update settings", returns = ?task); + debug!(returns = ?task, "Update settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -127,7 +127,7 @@ macro_rules! make_setting_route { let rtxn = index.read_txn()?; let settings = settings(&index, &rtxn)?; - debug_span!("Update settings", returns = ?settings); + debug!(returns = ?settings, "Update settings"); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -657,7 +657,7 @@ pub async fn update_all( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let new_settings = body.into_inner(); - debug_span!("Update all settings", parameters = ?new_settings); + debug!(parameters = ?new_settings, "Update all settings"); let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( @@ -770,7 +770,7 @@ pub async fn update_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Update all settings", returns = ?task); + debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -783,7 +783,7 @@ pub async fn get_all( let index = index_scheduler.index(&index_uid)?; let rtxn = index.read_txn()?; let new_settings = settings(&index, &rtxn)?; - debug_span!("Get all settings", returns = ?new_settings); + debug!(returns = ?new_settings, "Get all settings"); Ok(HttpResponse::Ok().json(new_settings)) } @@ -806,7 +806,7 @@ pub async fn delete_all( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Delete all settings", returns = ?task); + debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index ed0ccd8db..89cf63c50 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -10,7 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; -use tracing::{debug_span}; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -252,7 +252,7 @@ async fn get_stats( let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; - debug_span!("Get stats", returns = ?stats); + debug!(returns = ?stats, "Get stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 2a369bed1..86aa58e70 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -7,7 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; -use tracing::{debug_span}; +use tracing::debug; use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::extractors::authentication::policies::ActionPolicy; @@ -52,7 +52,7 @@ pub async fn multi_search_with_post( for (query_index, (index_uid, mut query)) in queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() { - debug_span!("Multi-search", on_index = query_index, parameters = ?query); + debug!(on_index = query_index, parameters = ?query, "Multi-search"); // Check index from API key if !index_scheduler.filters().is_index_authorized(&index_uid) { @@ -107,7 +107,7 @@ pub async fn multi_search_with_post( err })?; - debug_span!("Multi-search", returns = ?search_results); + debug!(returns = ?search_results, "Multi-search"); Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) } diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 4f329d251..c94529932 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -4,7 +4,7 @@ use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; -use tracing::debug_span; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; @@ -27,6 +27,6 @@ pub async fn create_snapshot( let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug_span!("Create snapshot", returns = ?task); + debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 64dc0b7db..aac60e6e6 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -15,7 +15,7 @@ use std::io::BufReader; use crossbeam_channel::Sender; use rayon::prelude::*; -use tracing::{debug_span}; +use tracing::debug; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; @@ -114,7 +114,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug_span!("merge", database = "facet-id-exists-docids"); + debug!(database = "facet-id-exists-docids", "merge"); match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); @@ -130,7 +130,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug_span!("merge", database = "facet-id-is-null-docids"); + debug!(database = "facet-id-is-null-docids", "merge"); match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); @@ -146,7 +146,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug_span!("merge", database = "facet-id-is-empty-docids"); + debug!(database = "facet-id-is-empty-docids", "merge"); match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); @@ -231,7 +231,7 @@ pub(crate) fn data_from_obkv_documents( extract_facet_number_docids, merge_deladd_cbo_roaring_bitmaps, TypedChunk::FieldIdFacetNumberDocids, - "field-id-facet-number-docids", + "field-id-facet-number-docidsdexing::details, ", ); Ok(()) @@ -261,18 +261,19 @@ fn spawn_extraction_task( let current_span = tracing::Span::current(); rayon::spawn(move || { - let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_multiple_chunks"); + let child_span = + tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks"); let _entered = child_span.enter(); - puffin::profile_scope!("extract_multiple_chunks", name); + puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); let current_span = tracing::Span::current(); rayon::spawn(move || match chunks { Ok(chunks) => { - let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "merge_multiple_chunks"); + let child_span = tracing::trace_span!(target: "", parent: ¤t_span, "merge_multiple_chunks"); let _entered = child_span.enter(); - debug_span!("merge", database = name); + debug!(database = name, "merge"); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); From cfb3e6b51f37a437fe63f4b28216022c2f8dc540 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 13:49:13 +0100 Subject: [PATCH 71/87] update the actix-web trace --- meilisearch/src/lib.rs | 42 +++++++++++++++++++++++++++++++++++++++-- meilisearch/src/main.rs | 2 +- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 711a11a0a..c43a32cdc 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -38,7 +38,7 @@ use meilisearch_types::versioning::{check_version_file, create_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; -use tracing::error; +use tracing::{error, info_span}; use tracing_subscriber::filter::Targets; use crate::error::MeilisearchHttpError; @@ -136,11 +136,49 @@ pub fn create_app( .allow_any_method() .max_age(86_400), // 24h ) - .wrap(tracing_actix_web::TracingLogger::default()) + .wrap(tracing_actix_web::TracingLogger::::new()) .wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) } +struct AwebTracingLogger; + +impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger { + fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span { + use tracing::field::Empty; + + let conn_info = request.connection_info(); + let headers = request.headers(); + let user_agent = headers + .get(http::header::USER_AGENT) + .map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned()) + .unwrap_or_default(); + info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty) + } + + fn on_request_end( + span: tracing::Span, + outcome: &Result, actix_web::Error>, + ) { + match &outcome { + Ok(response) => { + let code: i32 = response.response().status().as_u16().into(); + span.record("status_code", code); + + if let Some(error) = response.response().error() { + // use the status code already constructed for the outgoing HTTP response + span.record("error", &tracing::field::display(error.as_response_error())); + } + } + Err(error) => { + let code: i32 = error.error_response().status().as_u16().into(); + span.record("status_code", code); + span.record("error", &tracing::field::display(error.as_response_error())); + } + }; + } +} + enum OnFailure { RemoveDb, KeepDb, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 839550667..ed18fb97e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -34,7 +34,7 @@ fn setup(opt: &Opt) -> anyhow::Result { let subscriber = tracing_subscriber::registry().with(route_layer).with( tracing_subscriber::fmt::layer() - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::NEW) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) .with_filter( tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) .unwrap(), From 35aa9d5904014cd3904de26fccf4f171b991c6f6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 13:49:27 +0100 Subject: [PATCH 72/87] fix an error message --- meilisearch/src/routes/logs.rs | 2 +- meilisearch/tests/logs/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 39cf63d9e..5d1c16833 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -50,7 +50,7 @@ enum MyParseError { #[error(transparent)] ParseError(#[from] tracing_subscriber::filter::ParseError), #[error( - "Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`" + "Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`" )] Example, } diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index 078302632..384f134a7 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -36,7 +36,7 @@ async fn logs_stream_bad_target() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `info:meilisearch`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`", + "message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" From 2c88131bb117d56ba89afa171481a3edb8cf4cc1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 13:59:30 +0100 Subject: [PATCH 73/87] rename the fmt mode to human --- meilisearch/src/routes/logs.rs | 4 ++-- meilisearch/tests/logs/error.rs | 10 +++++----- meilisearch/tests/logs/mod.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 5d1c16833..5b3b82838 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -36,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { #[deserr(rename_all = lowercase)] pub enum LogMode { #[default] - Fmt, + Human, Profile, } @@ -160,7 +160,7 @@ fn make_layer< ) -> (Box + Send + Sync>, PinnedByteStream) { let guard = HandleGuard { logs: logs.into_inner() }; match opt.mode { - LogMode::Fmt => { + LogMode::Human => { let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); let fmt_layer = tracing_subscriber::fmt::layer() diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs index 384f134a7..4f4d741e3 100644 --- a/meilisearch/tests/logs/error.rs +++ b/meilisearch/tests/logs/error.rs @@ -89,7 +89,7 @@ async fn logs_stream_bad_mode() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Unknown value `tamo` at `.mode`: expected one of `fmt`, `profile`", + "message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" @@ -133,7 +133,7 @@ async fn logs_stream_bad_profile_memory() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.", + "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.", "code": "invalid_settings_typo_tolerance", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" @@ -146,10 +146,10 @@ async fn logs_stream_bad_profile_memory() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Fmt mode.", - "code": "invalid_settings_typo_tolerance", + "message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`", + "code": "bad_request", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" + "link": "https://docs.meilisearch.com/errors#bad_request" } "###); } diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 4091ac775..0002fe33c 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -57,7 +57,7 @@ async fn basic_test_log_stream_route() { .insert_header(ContentType::json()) .set_payload( serde_json::to_vec(&json!({ - "mode": "fmt", + "mode": "human", "target": "info", })) .unwrap(), From bf43a3f60a4d39205d7b37a4e484975c090a6df5 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 14:55:36 +0100 Subject: [PATCH 74/87] fix typo --- milli/src/update/index_documents/extract/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index aac60e6e6..44f54ff26 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -231,7 +231,7 @@ pub(crate) fn data_from_obkv_documents( extract_facet_number_docids, merge_deladd_cbo_roaring_bitmaps, TypedChunk::FieldIdFacetNumberDocids, - "field-id-facet-number-docidsdexing::details, ", + "field-id-facet-number-docids", ); Ok(()) From 285aa15d2fa22222c0076a95452db3edcb72fd51 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 8 Feb 2024 14:56:28 +0100 Subject: [PATCH 75/87] make the mode camelCase instead of lowercase --- meilisearch/src/routes/logs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs index 5b3b82838..d95f80bb8 100644 --- a/meilisearch/src/routes/logs.rs +++ b/meilisearch/src/routes/logs.rs @@ -33,7 +33,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } #[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)] -#[deserr(rename_all = lowercase)] +#[deserr(rename_all = camelCase)] pub enum LogMode { #[default] Human, From 407ad753edcebb6ccf5d9b931c1bedb93a81d3e4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 8 Feb 2024 15:11:42 +0100 Subject: [PATCH 76/87] rust fmt --- milli/src/update/index_documents/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index a4981823f..36aa94964 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -17,7 +17,7 @@ use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; -use tracing::{debug_span}; +use tracing::debug_span; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; From c02d585f5b99729172f3fb7c4847bc20b126c7c5 Mon Sep 17 00:00:00 2001 From: Eric Long Date: Mon, 12 Feb 2024 14:10:40 +0800 Subject: [PATCH 77/87] Upgrade rustls to 0.21.10 and ring to 0.17 --- Cargo.lock | 182 +++++++++++++---------------------- meilisearch-types/Cargo.toml | 2 +- meilisearch/Cargo.toml | 12 +-- meilisearch/src/main.rs | 2 +- meilisearch/src/option.rs | 4 +- 5 files changed, 78 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32ad13772..0fa1d5131 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,16 +36,16 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.5.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f" +checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" dependencies = [ "actix-codec", "actix-rt", "actix-service", "actix-tls", "actix-utils", - "ahash 0.8.3", + "ahash 0.8.8", "base64 0.21.7", "bitflags 2.4.1", "brotli", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "actix-tls" -version = "3.1.1" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3" +checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f" dependencies = [ "actix-rt", "actix-service", @@ -148,13 +148,11 @@ dependencies = [ "futures-core", "impl-more", "pin-project-lite", - "rustls 0.21.6", - "rustls-webpki", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", "tokio-util", "tracing", - "webpki-roots 0.22.6", + "webpki-roots", ] [[package]] @@ -169,9 +167,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.4.1" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b" +checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" dependencies = [ "actix-codec", "actix-http", @@ -183,7 +181,7 @@ dependencies = [ "actix-tls", "actix-utils", "actix-web-codegen", - "ahash 0.8.3", + "ahash 0.8.8", "bytes", "bytestring", "cfg-if", @@ -270,14 +268,15 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" dependencies = [ "cfg-if", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -834,9 +833,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", "libc", @@ -2126,8 +2125,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -2251,7 +2252,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.8", + "spin", "stable_deref_trait", ] @@ -2420,9 +2421,9 @@ dependencies = [ "futures-util", "http 0.2.11", "hyper", - "rustls 0.21.6", + "rustls", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", ] [[package]] @@ -3124,13 +3125,14 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.3.0" +version = "9.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" +checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" dependencies = [ "base64 0.21.7", + "js-sys", "pem", - "ring 0.16.20", + "ring", "serde", "serde_json", "simple_asn1", @@ -3721,7 +3723,7 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls 0.20.9", + "rustls", "rustls-pemfile", "segment", "serde", @@ -4257,11 +4259,12 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "pem" -version = "1.1.1" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" +checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" dependencies = [ - "base64 0.13.1", + "base64 0.21.7", + "serde", ] [[package]] @@ -4792,20 +4795,20 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.6", + "rustls", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "system-configuration", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 0.25.3", + "webpki-roots", "winreg", ] @@ -4823,30 +4826,15 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" [[package]] name = "ring" -version = "0.16.20" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - -[[package]] -name = "ring" -version = "0.17.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.48.0", ] @@ -4924,24 +4912,12 @@ dependencies = [ [[package]] name = "rustls" -version = "0.20.9" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring 0.16.20", - "sct", - "webpki", -] - -[[package]] -name = "rustls" -version = "0.21.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" -dependencies = [ - "log", - "ring 0.16.20", + "ring", "rustls-webpki", "sct", ] @@ -4961,8 +4937,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.3", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -5004,12 +4980,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -5275,12 +5251,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -5642,24 +5612,13 @@ dependencies = [ "syn 2.0.48", ] -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.9", - "tokio", - "webpki", -] - [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.6", + "rustls", "tokio", ] @@ -5915,12 +5874,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -5937,13 +5890,13 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls 0.21.6", + "rustls", "rustls-webpki", "serde", "serde_json", "socks", "url", - "webpki-roots 0.25.3", + "webpki-roots", ] [[package]] @@ -6153,25 +6106,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f" -dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", -] - -[[package]] -name = "webpki-roots" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" -dependencies = [ - "webpki", -] - [[package]] name = "webpki-roots" version = "0.25.3" @@ -6533,6 +6467,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "zerofrom" version = "0.1.3" diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index f5bfaa036..b9edb4c1e 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true license.workspace = true [dependencies] -actix-web = { version = "4.4.1", default-features = false } +actix-web = { version = "4.5.1", default-features = false } anyhow = "1.0.79" convert_case = "0.6.0" csv = "1.3.0" diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 7fbabba87..f8a50238a 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -14,18 +14,18 @@ default-run = "meilisearch" [dependencies] actix-cors = "0.7.0" -actix-http = { version = "3.5.1", default-features = false, features = [ +actix-http = { version = "3.6.0", default-features = false, features = [ "compress-brotli", "compress-gzip", - "rustls", + "rustls-0_21", ] } actix-utils = "3.0.1" -actix-web = { version = "4.4.1", default-features = false, features = [ +actix-web = { version = "4.5.1", default-features = false, features = [ "macros", "compress-brotli", "compress-gzip", "cookies", - "rustls", + "rustls-0_21", ] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } anyhow = { version = "1.0.79", features = ["backtrace"] } @@ -52,7 +52,7 @@ index-scheduler = { path = "../index-scheduler" } indexmap = { version = "2.1.0", features = ["serde"] } is-terminal = "0.4.10" itertools = "0.11.0" -jsonwebtoken = "8.3.0" +jsonwebtoken = "9.2.0" lazy_static = "1.4.0" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } @@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [ "rustls-tls", "json", ], default-features = false } -rustls = "0.20.8" +rustls = "0.21.6" rustls-pemfile = "1.0.2" segment = { version = "0.2.3", optional = true } serde = { version = "1.0.195", features = ["derive"] } diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index ed18fb97e..1e067b43e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -133,7 +133,7 @@ async fn run_http( .keep_alive(KeepAlive::Os); if let Some(config) = opt_clone.get_ssl_config()? { - http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?; + http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?; } else { http_server.bind(&opt_clone.http_addr)?.run().await?; } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 9586a3f6f..96bc29006 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -503,11 +503,11 @@ impl Opt { } if self.ssl_require_auth { let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(verifier) + config.with_client_cert_verifier(Arc::from(verifier)) } else { let verifier = AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(verifier) + config.with_client_cert_verifier(Arc::from(verifier)) } } None => config.with_no_client_auth(), From 290f6d15e725644c171504774e8f981e376a31a8 Mon Sep 17 00:00:00 2001 From: irevoire Date: Mon, 12 Feb 2024 10:15:00 +0000 Subject: [PATCH 78/87] Update version for the next release (v1.7.0) in Cargo.toml --- Cargo.lock | 32 ++++++++++++++++---------------- Cargo.toml | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 32ad13772..c16dfd860 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "bytes", @@ -1537,7 +1537,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "big_s", @@ -1781,7 +1781,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.6.1" +version = "1.7.0" dependencies = [ "faux", "tempfile", @@ -1803,7 +1803,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.6.1" +version = "1.7.0" dependencies = [ "insta", "nom", @@ -1834,7 +1834,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.6.1" +version = "1.7.0" dependencies = [ "criterion", "serde_json", @@ -1952,7 +1952,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.6.1" +version = "1.7.0" dependencies = [ "arbitrary", "clap", @@ -2930,7 +2930,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "big_s", @@ -3116,7 +3116,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.6.1" +version = "1.7.0" dependencies = [ "criterion", "serde_json", @@ -3655,7 +3655,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.6.1" +version = "1.7.0" dependencies = [ "insta", "md5", @@ -3664,7 +3664,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.6.1" +version = "1.7.0" dependencies = [ "actix-cors", "actix-http", @@ -3757,7 +3757,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.6.1" +version = "1.7.0" dependencies = [ "base64 0.21.7", "enum-iterator", @@ -3776,7 +3776,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.6.1" +version = "1.7.0" dependencies = [ "actix-web", "anyhow", @@ -3806,7 +3806,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "clap", @@ -3845,7 +3845,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.6.1" +version = "1.7.0" dependencies = [ "arroy", "big_s", @@ -4272,7 +4272,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.6.1" +version = "1.7.0" dependencies = [ "big_s", "serde_json", @@ -6478,7 +6478,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.6.1" +version = "1.7.0" dependencies = [ "cargo_metadata", "clap", diff --git a/Cargo.toml b/Cargo.toml index 7f6a8088e..11190025a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ members = [ ] [workspace.package] -version = "1.6.1" +version = "1.7.0" authors = [ "Quentin de Quelen ", "Clément Renault ", From 024de0dcf834d034fa4a8372a76139a77fd035b6 Mon Sep 17 00:00:00 2001 From: curquiza Date: Wed, 14 Feb 2024 17:36:39 +0100 Subject: [PATCH 79/87] Create automation when creating Milestone to create update-version issue --- .github/workflows/milestone-workflow.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 2b8b7bf62..2ede3dc21 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -110,6 +110,25 @@ jobs: --milestone $MILESTONE_VERSION \ --assignee curquiza + create-update-version-issue: + needs: get-release-version + # Create the changelog issue if the release is not only a patch release + if: github.event.action == 'created' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE + - name: Create the issue + run: | + gh issue create \ + --title "Update version in Cargo.toml for $MILESTONE_VERSION" \ + --label 'maintenance' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION + # ---------------- # MILESTONE CLOSED # ---------------- From 9ee4f55e6c72116446438dbc03e6cf20c12f9081 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 7 Sep 2023 11:16:51 +0200 Subject: [PATCH 80/87] let you specify your task id --- index-scheduler/src/error.rs | 4 + index-scheduler/src/lib.rs | 815 ++++++++++++-------- meilisearch/src/lib.rs | 2 +- meilisearch/src/routes/dump.rs | 5 +- meilisearch/src/routes/indexes/documents.rs | 25 +- meilisearch/src/routes/indexes/mod.rs | 12 +- meilisearch/src/routes/indexes/settings.rs | 18 +- meilisearch/src/routes/mod.rs | 30 +- meilisearch/src/routes/snapshot.rs | 5 +- meilisearch/src/routes/swap_indexes.rs | 5 +- meilisearch/src/routes/tasks.rs | 9 +- meilisearch/tests/index/create_index.rs | 71 ++ 12 files changed, 655 insertions(+), 346 deletions(-) diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index bbe526460..223b84762 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -48,6 +48,8 @@ impl From for Code { pub enum Error { #[error("{1}")] WithCustomErrorCode(Code, Box), + #[error("Received bad task id: {received} should be >= to {expected}.")] + BadTaskId { received: TaskId, expected: TaskId }, #[error("Index `{0}` not found.")] IndexNotFound(String), #[error("Index `{0}` already exists.")] @@ -161,6 +163,7 @@ impl Error { match self { Error::IndexNotFound(_) | Error::WithCustomErrorCode(_, _) + | Error::BadTaskId { .. } | Error::IndexAlreadyExists(_) | Error::SwapDuplicateIndexFound(_) | Error::SwapDuplicateIndexesFound(_) @@ -205,6 +208,7 @@ impl ErrorCode for Error { fn error_code(&self) -> Code { match self { Error::WithCustomErrorCode(code, _) => *code, + Error::BadTaskId { .. } => Code::BadRequest, Error::IndexNotFound(_) => Code::IndexNotFound, Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 7514a2a68..b1edaabe5 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -993,7 +993,7 @@ impl IndexScheduler { /// Register a new task in the scheduler. /// /// If it fails and data was associated with the task, it tries to delete the associated data. - pub fn register(&self, kind: KindWithContent) -> Result { + pub fn register(&self, kind: KindWithContent, task_id: Option) -> Result { let mut wtxn = self.env.write_txn()?; // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task @@ -1003,8 +1003,16 @@ impl IndexScheduler { return Err(Error::NoSpaceLeftInTaskQueue); } + let next_task_id = self.next_task_id(&wtxn)?; + + if let Some(uid) = task_id { + if uid < next_task_id { + return Err(Error::BadTaskId { received: uid, expected: next_task_id }); + } + } + let mut task = Task { - uid: self.next_task_id(&wtxn)?, + uid: task_id.unwrap_or(next_task_id), enqueued_at: OffsetDateTime::now_utc(), started_at: None, finished_at: None, @@ -1386,13 +1394,16 @@ impl IndexScheduler { // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); - self.register(KindWithContent::TaskDeletion { - query: format!( - "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", - delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, - ), - tasks: to_delete, - })?; + self.register( + KindWithContent::TaskDeletion { + query: format!( + "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", + delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, + ), + tasks: to_delete, + }, + None, + )?; Ok(()) } @@ -2016,7 +2027,7 @@ mod tests { for (idx, kind) in kinds.into_iter().enumerate() { let k = kind.as_kind(); - let task = index_scheduler.register(kind).unwrap(); + let task = index_scheduler.register(kind, None).unwrap(); index_scheduler.assert_internally_consistent(); assert_eq!(task.uid, idx as u32); @@ -2031,18 +2042,18 @@ mod tests { fn insert_task_while_another_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_b", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }) + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); } @@ -2051,7 +2062,7 @@ mod tests { fn test_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); handle.advance_till([Start, BatchCreated]); @@ -2065,17 +2076,23 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2094,22 +2111,25 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); @@ -2142,7 +2162,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2151,10 +2171,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }, + None, + ) .unwrap(); // again, no progress made at all, but one more task is registered snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); @@ -2188,7 +2211,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2199,10 +2222,13 @@ mod tests { // Now we delete the first task index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); @@ -2225,7 +2251,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2237,10 +2263,13 @@ mod tests { // Now we delete the first task multiple times in a row for _ in 0..2 { index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2263,14 +2292,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); @@ -2292,7 +2324,10 @@ mod tests { }"#; index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2300,19 +2335,22 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2336,21 +2374,27 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2373,10 +2417,13 @@ mod tests { fn document_deletion_and_document_addition() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2390,14 +2437,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2428,17 +2478,20 @@ mod tests { for name in index_names { index_scheduler - .register(KindWithContent::IndexCreation { - index_uid: name.to_string(), - primary_key: None, - }) + .register( + KindWithContent::IndexCreation { + index_uid: name.to_string(), + primary_key: None, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } for name in index_names { index_scheduler - .register(KindWithContent::DocumentClear { index_uid: name.to_string() }) + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2463,7 +2516,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2477,18 +2530,24 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); @@ -2498,7 +2557,7 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }).unwrap(); + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None).unwrap(); handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); } @@ -2515,7 +2574,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_n_successful_batches(4); @@ -2525,12 +2584,15 @@ mod tests { snapshot!(first_snap, name: "initial_tasks_processed"); let err = index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }, + None, + ) .unwrap_err(); snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); @@ -2539,13 +2601,16 @@ mod tests { // Index `e` does not exist, but we don't check its existence yet index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, - IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); // Now the first swap should have an error message saying `e` and `f` do not exist @@ -2566,17 +2631,20 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler)); @@ -2601,7 +2669,7 @@ mod tests { }, ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2618,7 +2686,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2626,10 +2694,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); @@ -2644,7 +2715,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2652,10 +2723,13 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); @@ -2685,7 +2759,7 @@ mod tests { replace_document_import_task("wolfo", None, 2, documents_count2), ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_one_successful_batch(); @@ -2693,10 +2767,13 @@ mod tests { handle.advance_till([Start, BatchCreated, InsideProcessBatch]); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1, 2]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); @@ -2724,14 +2801,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2771,14 +2851,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2820,14 +2903,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2870,14 +2956,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2921,14 +3010,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2973,13 +3065,13 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); handle.advance_n_successful_batches(3); @@ -3037,11 +3129,11 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3260,17 +3352,17 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3346,20 +3438,20 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); handle.advance_n_successful_batches(1); let kind = KindWithContent::TaskCancelation { query: "test_query".to_string(), tasks: [0, 1, 2, 3].into_iter().collect(), }; - let task_cancelation = index_scheduler.register(kind).unwrap(); + let task_cancelation = index_scheduler.register(kind, None).unwrap(); handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3394,7 +3486,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); handle.advance_one_failed_batch(); @@ -3419,14 +3511,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); @@ -3457,14 +3552,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3513,14 +3611,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3561,14 +3662,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3596,7 +3700,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3615,14 +3722,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3655,7 +3765,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3674,14 +3787,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3718,7 +3834,10 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3738,14 +3857,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3791,14 +3913,17 @@ mod tests { let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3843,14 +3968,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3904,14 +4032,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3961,14 +4092,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4042,14 +4176,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4125,14 +4262,17 @@ mod tests { file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4186,7 +4326,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); @@ -4206,15 +4346,18 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); // on average this task takes ~600 bytes loop { - let result = index_scheduler.register(KindWithContent::IndexCreation { - index_uid: S("doggo"), - primary_key: None, - }); + let result = index_scheduler.register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ); if result.is_err() { break; } @@ -4224,7 +4367,10 @@ mod tests { // at this point the task DB shoud have reached its limit and we should not be able to register new tasks let result = index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4232,10 +4378,10 @@ mod tests { // Even the task deletion that doesn't delete anything shouldn't be accepted let result = index_scheduler - .register(KindWithContent::TaskDeletion { - query: S("test"), - tasks: RoaringBitmap::new(), - }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, + None, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4243,13 +4389,19 @@ mod tests { // But a task deletion that delete something should works index_scheduler - .register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); // Now we should be able to enqueue a few tasks again index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); } @@ -4262,22 +4414,34 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_successful_batch(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); handle.advance_one_failed_batch(); // at this point the max number of tasks is reached // we can still enqueue multiple tasks index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) .unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap(); @@ -4325,11 +4489,11 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None).unwrap(); snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" { @@ -4479,11 +4643,11 @@ mod tests { query: "cancel dump".to_owned(), tasks: RoaringBitmap::from_iter([0]), }; - let _ = index_scheduler.register(dump_creation).unwrap(); + let _ = index_scheduler.register(dump_creation, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - let _ = index_scheduler.register(dump_cancellation).unwrap(); + let _ = index_scheduler.register(dump_cancellation, None).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); @@ -4491,4 +4655,21 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); } + + #[test] + fn basic_set_taskid() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None).unwrap(); + snapshot!(task.uid, @"0"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12)).unwrap(); + snapshot!(task.uid, @"12"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let error = index_scheduler.register(kind, Some(5)).unwrap_err(); + snapshot!(error, @"Received bad task id: 5 should be >= to 13."); + } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index c43a32cdc..328b9e9b2 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -251,7 +251,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< .name(String::from("register-snapshot-tasks")) .spawn(move || loop { thread::sleep(snapshot_delay); - if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) { + if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) { error!("Error while registering snapshot: {}", e); } }) diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 071ae60b8..8f44070d8 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -11,7 +11,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -29,8 +29,9 @@ pub async fn create_dump( keys: auth_controller.list_keys()?, instance_uid: analytics.instance_uid().cloned(), }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 1f41fa10c..492f039cf 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -7,7 +7,7 @@ use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; -use index_scheduler::IndexScheduler; +use index_scheduler::{IndexScheduler, TaskId}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -36,7 +36,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::search::parse_filter; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { @@ -130,9 +130,10 @@ pub async fn delete_document( index_uid: index_uid.to_string(), documents_ids: vec![document_id], }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!(returns = ?task, "Delete document"); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -277,6 +278,7 @@ pub async fn replace_documents( analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -285,6 +287,7 @@ pub async fn replace_documents( params.csv_delimiter, body, IndexDocumentsMethod::ReplaceDocuments, + uid, allow_index_creation, ) .await?; @@ -309,6 +312,7 @@ pub async fn update_documents( analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -317,6 +321,7 @@ pub async fn update_documents( params.csv_delimiter, body, IndexDocumentsMethod::UpdateDocuments, + uid, allow_index_creation, ) .await?; @@ -334,6 +339,7 @@ async fn document_addition( csv_delimiter: Option, mut body: Payload, method: IndexDocumentsMethod, + task_id: Option, allow_index_creation: bool, ) -> Result { let format = match ( @@ -450,7 +456,7 @@ async fn document_addition( }; let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? { + let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? { Ok(task) => task, Err(e) => { index_scheduler.delete_update_file(uuid)?; @@ -480,8 +486,9 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) @@ -516,8 +523,9 @@ pub async fn delete_documents_by_filter( .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) @@ -533,8 +541,9 @@ pub async fn clear_all_documents( analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index d80bd9c61..6451d930d 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -17,7 +17,7 @@ use serde_json::json; use time::OffsetDateTime; use tracing::debug; -use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -137,8 +137,9 @@ pub async fn create_index( ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) @@ -206,8 +207,9 @@ pub async fn update_index( primary_key: body.primary_key, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) @@ -216,11 +218,13 @@ pub async fn update_index( pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 23e8925c7..9fbd84161 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -15,7 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; #[macro_export] macro_rules! make_setting_route { @@ -34,7 +34,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::routes::SummarizedTaskView; + use $crate::routes::{get_task_id, SummarizedTaskView}; pub async fn delete( index_scheduler: GuardedData< @@ -42,6 +42,7 @@ macro_rules! make_setting_route { Data, >, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -56,8 +57,9 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? .into(); @@ -105,8 +107,9 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? .into(); @@ -767,8 +770,9 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) @@ -790,6 +794,7 @@ pub async fn get_all( pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -803,8 +808,9 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 89cf63c50..61a9f3352 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -4,7 +4,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_types::error::ResponseError; +use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; @@ -45,6 +45,34 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/experimental-features").configure(features::configure)); } +pub fn get_task_id(req: &HttpRequest) -> Result, ResponseError> { + let task_id = req + .headers() + .get("TaskId") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("TaskId is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map(|s| { + s.parse::().map_err(|e| { + ResponseError::from_msg( + format!( + "Could not parse the TaskId as a {}: {e}", + std::any::type_name::(), + ), + Code::BadRequest, + ) + }) + }) + .transpose()?; + Ok(task_id) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index c94529932..28dbac85f 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -10,7 +10,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, SummarizedTaskView}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); @@ -24,8 +24,9 @@ pub async fn create_snapshot( analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); let task = KindWithContent::SnapshotCreation; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 79e619705..64268dbfa 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde_json::json; -use super::SummarizedTaskView; +use super::{get_task_id, SummarizedTaskView}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -60,7 +60,8 @@ pub async fn swap_indexes( } let task = KindWithContent::IndexSwap { swaps }; + let uid = get_task_id(&req)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 03b63001d..26e1c43f8 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -18,7 +18,7 @@ use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; -use super::SummarizedTaskView; +use super::{get_task_id, SummarizedTaskView}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; @@ -197,7 +197,9 @@ async fn cancel_tasks( let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??; + let uid = get_task_id(&req)?; + let task = + task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) @@ -242,7 +244,8 @@ async fn delete_tasks( let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??; + let uid = get_task_id(&req)?; + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index 7ce56d440..b9f755f35 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -199,3 +199,74 @@ async fn error_create_with_invalid_index_uid() { } "###); } + +#[actix_rt::test] +async fn send_task_id() { + let server = Server::new().await; + let app = server.init_web_app().await; + let index = server.index("catto"); + let (response, code) = index.create(None).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "catto", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "doggo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "25")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"202 Accepted"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 25, + "indexUid": "doggo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "girafo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "12")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"400 Bad Request"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response), @r###" + { + "message": "Received bad task id: 12 should be >= to 26.", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} From 01ae46dd801a2fbe43351660acc65e3467747006 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 20 Feb 2024 11:24:44 +0100 Subject: [PATCH 81/87] add an experimental cli parameter to allow specifying your task id --- .../src/analytics/segment_analytics.rs | 3 +++ .../src/extractors/sequential_extractor.rs | 1 + meilisearch/src/lib.rs | 1 + meilisearch/src/option.rs | 17 +++++++++++++++++ meilisearch/src/routes/dump.rs | 4 +++- meilisearch/src/routes/indexes/documents.rs | 19 +++++++++++++------ meilisearch/src/routes/indexes/mod.rs | 10 +++++++--- meilisearch/src/routes/indexes/settings.rs | 14 ++++++++++---- meilisearch/src/routes/mod.rs | 6 +++++- meilisearch/src/routes/snapshot.rs | 4 +++- meilisearch/src/routes/swap_indexes.rs | 4 +++- meilisearch/src/routes/tasks.rs | 7 +++++-- meilisearch/tests/index/create_index.rs | 9 +++++++-- 13 files changed, 78 insertions(+), 21 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index a38ddaab2..a78b0d11b 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -250,6 +250,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_ha_parameters: bool, experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, @@ -288,6 +289,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_ha_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, @@ -335,6 +337,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_ha_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), diff --git a/meilisearch/src/extractors/sequential_extractor.rs b/meilisearch/src/extractors/sequential_extractor.rs index c04210616..23d6cb997 100644 --- a/meilisearch/src/extractors/sequential_extractor.rs +++ b/meilisearch/src/extractors/sequential_extractor.rs @@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C } gen_seq! { SeqFromRequestFut4; A B C D } gen_seq! { SeqFromRequestFut5; A B C D E } gen_seq! { SeqFromRequestFut6; A B C D E F } +gen_seq! { SeqFromRequestFut7; A B C D E F G } pin_project! { #[project = ExtractProj] diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 328b9e9b2..2d9dec485 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -453,6 +453,7 @@ pub fn configure_data( .app_data(auth) .app_data(web::Data::from(analytics)) .app_data(web::Data::new(logs)) + .app_data(web::Data::new(opt.clone())) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 9586a3f6f..4dd17d546 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,6 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +const MEILI_EXPERIMENTAL_HA_PARAMETERS: &str = "MEILI_EXPERIMENTAL_HA_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = @@ -317,6 +318,17 @@ pub struct Opt { #[serde(default)] pub experimental_enable_logs_route: bool, + /// Enable multiple features that helps you to run meilisearch in a high availability context. + /// TODO: TAMO: Update the discussion link + /// For more information, see: + /// + /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now + /// - Lets you specify a custom task ID upon registering a task + /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) + #[clap(long, env = MEILI_EXPERIMENTAL_HA_PARAMETERS)] + #[serde(default)] + pub experimental_ha_parameters: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] @@ -423,6 +435,7 @@ impl Opt { no_analytics, experimental_enable_metrics, experimental_enable_logs_route, + experimental_ha_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -479,6 +492,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_HA_PARAMETERS, + experimental_ha_parameters.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, experimental_enable_logs_route.to_string(), diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 8f44070d8..56231a759 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -12,6 +12,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -21,6 +22,7 @@ pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); @@ -29,7 +31,7 @@ pub async fn create_dump( keys: auth_controller.list_keys()?, instance_uid: analytics.instance_uid().cloned(), }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 492f039cf..5bf7eaa8d 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -38,6 +38,7 @@ use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::search::parse_filter; +use crate::Opt; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -119,6 +120,7 @@ pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); @@ -130,7 +132,7 @@ pub async fn delete_document( index_uid: index_uid.to_string(), documents_ids: vec![document_id], }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!("returns: {:?}", task); @@ -268,6 +270,7 @@ pub async fn replace_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -278,7 +281,7 @@ pub async fn replace_documents( analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -302,6 +305,7 @@ pub async fn update_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -312,7 +316,7 @@ pub async fn update_documents( analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -472,6 +476,7 @@ pub async fn delete_documents_batch( index_uid: web::Path, body: web::Json>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by batch"); @@ -486,7 +491,7 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -506,6 +511,7 @@ pub async fn delete_documents_by_filter( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by filter"); @@ -523,7 +529,7 @@ pub async fn delete_documents_by_filter( .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -535,13 +541,14 @@ pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 6451d930d..59a1f0e64 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -22,6 +22,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; pub mod documents; pub mod facet_search; @@ -123,6 +124,7 @@ pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Create index"); @@ -137,7 +139,7 @@ pub async fn create_index( ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Create index"); @@ -191,6 +193,7 @@ pub async fn update_index( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Update index"); @@ -207,7 +210,7 @@ pub async fn update_index( primary_key: body.primary_key, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -219,10 +222,11 @@ pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); debug!(returns = ?task, "Delete index"); diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 9fbd84161..6e43bce41 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -16,6 +16,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; #[macro_export] macro_rules! make_setting_route { @@ -34,6 +35,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::Opt; use $crate::routes::{get_task_id, SummarizedTaskView}; pub async fn delete( @@ -43,6 +45,7 @@ macro_rules! make_setting_route { >, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -57,7 +60,7 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? @@ -75,6 +78,7 @@ macro_rules! make_setting_route { index_uid: actix_web::web::Path, body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, + opt: web::Data, $analytics_var: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -107,7 +111,7 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) .await?? @@ -655,6 +659,7 @@ pub async fn update_all( index_uid: web::Path, body: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -770,7 +775,7 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); @@ -795,6 +800,7 @@ pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -808,7 +814,7 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 61a9f3352..2dc89b150 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -15,6 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; +use crate::Opt; const PAGINATION_DEFAULT_LIMIT: usize = 20; @@ -45,7 +46,10 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/experimental-features").configure(features::configure)); } -pub fn get_task_id(req: &HttpRequest) -> Result, ResponseError> { +pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { + if !opt.experimental_ha_parameters { + return Ok(None); + } let task_id = req .headers() .get("TaskId") diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 28dbac85f..6b3178126 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -11,6 +11,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); @@ -19,12 +20,13 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); let task = KindWithContent::SnapshotCreation; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 64268dbfa..f8adeeb18 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -16,6 +16,7 @@ use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); @@ -32,6 +33,7 @@ pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -60,7 +62,7 @@ pub async fn swap_indexes( } let task = KindWithContent::IndexSwap { swaps }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task: SummarizedTaskView = tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 26e1c43f8..279b57e3d 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -23,6 +23,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; const DEFAULT_LIMIT: u32 = 20; @@ -161,6 +162,7 @@ async fn cancel_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -197,7 +199,7 @@ async fn cancel_tasks( let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; let task: SummarizedTaskView = task.into(); @@ -209,6 +211,7 @@ async fn delete_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -244,7 +247,7 @@ async fn delete_tasks( let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; - let uid = get_task_id(&req)?; + let uid = get_task_id(&req, &opt)?; let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; let task: SummarizedTaskView = task.into(); diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index b9f755f35..7a678624c 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -2,9 +2,10 @@ use actix_web::http::header::ContentType; use actix_web::test; use http::header::ACCEPT_ENCODING; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use crate::common::encoder::Encoder; -use crate::common::{Server, Value}; +use crate::common::{default_settings, Server, Value}; use crate::json; #[actix_rt::test] @@ -202,7 +203,11 @@ async fn error_create_with_invalid_index_uid() { #[actix_rt::test] async fn send_task_id() { - let server = Server::new().await; + let temp = tempfile::tempdir().unwrap(); + + let options = Opt { experimental_ha_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + let app = server.init_web_app().await; let index = server.index("catto"); let (response, code) = index.create(None).await; From 6ba999491693fe6ca94376a421817707fc8e66c3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 20 Feb 2024 12:16:50 +0100 Subject: [PATCH 82/87] disable the auto deletion of tasks when the ha mode is enabled --- index-scheduler/src/insta_snapshot.rs | 1 + index-scheduler/src/lib.rs | 68 +++++++++++++- .../task_deletion_have_not_been_enqueued.snap | 90 +++++++++++++++++++ .../task_queue_is_full.snap | 90 +++++++++++++++++++ meilisearch/src/lib.rs | 1 + 5 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 42f041578..988e75b81 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let IndexScheduler { autobatching_enabled, + cleanup_enabled: _, must_stop_processing: _, processing_tasks, file_store, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index b1edaabe5..9a1799469 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -264,6 +264,9 @@ pub struct IndexSchedulerOptions { /// Set to `true` iff the index scheduler is allowed to automatically /// batch tasks together, to process multiple tasks at once. pub autobatching_enabled: bool, + /// Set to `true` iff the index scheduler is allowed to automatically + /// delete the finished tasks when there are too many tasks. + pub cleanup_enabled: bool, /// The maximum number of tasks stored in the task queue before starting /// to auto schedule task deletions. pub max_number_of_tasks: usize, @@ -324,6 +327,9 @@ pub struct IndexScheduler { /// Whether auto-batching is enabled or not. pub(crate) autobatching_enabled: bool, + /// Whether we should automatically cleanup the task queue or not. + pub(crate) cleanup_enabled: bool, + /// The max number of tasks allowed before the scheduler starts to delete /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, @@ -390,6 +396,7 @@ impl IndexScheduler { index_mapper: self.index_mapper.clone(), wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, + cleanup_enabled: self.cleanup_enabled, max_number_of_tasks: self.max_number_of_tasks, max_number_of_batched_tasks: self.max_number_of_batched_tasks, puffin_frame: self.puffin_frame.clone(), @@ -491,6 +498,7 @@ impl IndexScheduler { wake_up: Arc::new(SignalEvent::auto(true)), puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, + cleanup_enabled: options.cleanup_enabled, max_number_of_tasks: options.max_number_of_tasks, max_number_of_batched_tasks: options.max_number_of_batched_tasks, dumps_path: options.dumps_path, @@ -1134,7 +1142,9 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } - self.cleanup_task_queue()?; + if self.cleanup_enabled { + self.cleanup_task_queue()?; + } let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let batch = @@ -1781,6 +1791,7 @@ mod tests { index_count: 5, indexer_config, autobatching_enabled: true, + cleanup_enabled: true, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: usize::MAX, instance_features: Default::default(), @@ -4484,6 +4495,61 @@ mod tests { drop(rtxn); } + #[test] + fn test_disable_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = + IndexScheduler::test_with_custom_config(vec![], |config| { + config.cleanup_enabled = false; + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + + // now we're above the max number of tasks + // and if we try to advance in the tick function no new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + drop(rtxn); + } + #[test] fn basic_get_stats() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 2d9dec485..500d56079 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -286,6 +286,7 @@ fn open_or_create_database_unchecked( enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, + cleanup_enabled: !opt.experimental_ha_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, From 05ae29198970d9265b26f6f7232e8611708ca2d6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 11:21:26 +0100 Subject: [PATCH 83/87] implement the dry run ha parameter --- file-store/src/lib.rs | 22 +- index-scheduler/src/lib.rs | 249 ++++++++++++++++---- meilisearch/src/lib.rs | 4 +- meilisearch/src/routes/dump.rs | 7 +- meilisearch/src/routes/indexes/documents.rs | 35 ++- meilisearch/src/routes/indexes/mod.rs | 16 +- meilisearch/src/routes/indexes/settings.rs | 20 +- meilisearch/src/routes/mod.rs | 19 ++ meilisearch/src/routes/snapshot.rs | 7 +- meilisearch/src/routes/swap_indexes.rs | 7 +- meilisearch/src/routes/tasks.rs | 10 +- 11 files changed, 317 insertions(+), 79 deletions(-) diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index 75db9bb5f..e3851a2df 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -56,7 +56,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { dry: false, file, path }; Ok((uuid, update_file)) } @@ -67,7 +67,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::from_u128(uuid); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { dry: false, file, path }; Ok((uuid, update_file)) } @@ -135,13 +135,29 @@ impl FileStore { } pub struct File { + dry: bool, path: PathBuf, file: NamedTempFile, } impl File { + pub fn dry_file() -> Result { + #[cfg(target_family = "unix")] + let path = PathBuf::from_str("/dev/null").unwrap(); + #[cfg(target_family = "windows")] + let path = PathBuf::from_str("\\Device\\Null").unwrap(); + + Ok(Self { + dry: true, + path: path.clone(), + file: tempfile::Builder::new().make(|_| std::fs::File::create(path.clone()))?, + }) + } + pub fn persist(self) -> Result<()> { - self.file.persist(&self.path)?; + if !self.dry { + self.file.persist(&self.path)?; + } Ok(()) } } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 9a1799469..5d0ce9eb9 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1001,7 +1001,12 @@ impl IndexScheduler { /// Register a new task in the scheduler. /// /// If it fails and data was associated with the task, it tries to delete the associated data. - pub fn register(&self, kind: KindWithContent, task_id: Option) -> Result { + pub fn register( + &self, + kind: KindWithContent, + task_id: Option, + dry_run: bool, + ) -> Result { let mut wtxn = self.env.write_txn()?; // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task @@ -1037,6 +1042,11 @@ impl IndexScheduler { // (that it does not contain duplicate indexes). check_index_swap_validity(&task)?; + // At this point the task is going to be registered and no further checks will be done + if dry_run { + return Ok(task); + } + // Get rid of the mutability. let task = task; @@ -1101,8 +1111,12 @@ impl IndexScheduler { /// The returned file and uuid can be used to associate /// some data to a task. The file will be kept until /// the task has been fully processed. - pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { - Ok(self.file_store.new_update()?) + pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { + if dry_run { + Ok((Uuid::nil(), file_store::File::dry_file()?)) + } else { + Ok(self.file_store.new_update()?) + } } #[cfg(test)] @@ -1413,6 +1427,7 @@ impl IndexScheduler { tasks: to_delete, }, None, + false, )?; Ok(()) @@ -1534,7 +1549,7 @@ impl<'a> Dump<'a> { ) -> Result { let content_uuid = match content_file { Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.index_scheduler.create_update_file()?; + let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); for doc in content_file { builder.append_json_object(&doc?)?; @@ -2038,7 +2053,7 @@ mod tests { for (idx, kind) in kinds.into_iter().enumerate() { let k = kind.as_kind(); - let task = index_scheduler.register(kind, None).unwrap(); + let task = index_scheduler.register(kind, None, false).unwrap(); index_scheduler.assert_internally_consistent(); assert_eq!(task.uid, idx as u32); @@ -2053,18 +2068,18 @@ mod tests { fn insert_task_while_another_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); } @@ -2073,7 +2088,7 @@ mod tests { fn test_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id"), None).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); handle.advance_till([Start, BatchCreated]); @@ -2090,6 +2105,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2098,12 +2114,13 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2125,22 +2142,23 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); @@ -2173,7 +2191,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2188,6 +2206,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0, 1]), }, None, + false, ) .unwrap(); // again, no progress made at all, but one more task is registered @@ -2222,7 +2241,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2239,6 +2258,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); @@ -2262,7 +2282,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2280,6 +2300,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2313,6 +2334,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); @@ -2338,6 +2360,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2356,12 +2379,13 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2395,6 +2419,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2405,6 +2430,7 @@ mod tests { documents_ids: vec![S("1"), S("2")], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2434,6 +2460,7 @@ mod tests { documents_ids: vec![S("1"), S("2")], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2458,6 +2485,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2495,6 +2523,7 @@ mod tests { primary_key: None, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2502,7 +2531,11 @@ mod tests { for name in index_names { index_scheduler - .register(KindWithContent::DocumentClear { index_uid: name.to_string() }, None) + .register( + KindWithContent::DocumentClear { index_uid: name.to_string() }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2527,7 +2560,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2549,6 +2582,7 @@ mod tests { ], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); @@ -2558,6 +2592,7 @@ mod tests { swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); @@ -2568,7 +2603,9 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }, None).unwrap(); + index_scheduler + .register(KindWithContent::IndexSwap { swaps: vec![] }, None, false) + .unwrap(); handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); } @@ -2585,7 +2622,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_n_successful_batches(4); @@ -2603,6 +2640,7 @@ mod tests { ], }, None, + false, ) .unwrap_err(); snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); @@ -2621,6 +2659,7 @@ mod tests { ], }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -2652,10 +2691,11 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler)); @@ -2680,7 +2720,7 @@ mod tests { }, ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2697,7 +2737,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2711,6 +2751,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); @@ -2726,7 +2767,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0), None) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2740,6 +2781,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0]), }, None, + false, ) .unwrap(); @@ -2770,7 +2812,7 @@ mod tests { replace_document_import_task("wolfo", None, 2, documents_count2), ]; for task in to_enqueue { - let _ = index_scheduler.register(task, None).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_one_successful_batch(); @@ -2784,6 +2826,7 @@ mod tests { tasks: RoaringBitmap::from_iter([0, 1, 2]), }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); @@ -2822,6 +2865,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2872,6 +2916,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2924,6 +2969,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -2977,6 +3023,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3031,6 +3078,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3076,13 +3124,13 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); handle.advance_n_successful_batches(3); @@ -3140,11 +3188,11 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3363,17 +3411,17 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3449,20 +3497,20 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind, None).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind, None).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); let kind = KindWithContent::TaskCancelation { query: "test_query".to_string(), tasks: [0, 1, 2, 3].into_iter().collect(), }; - let task_cancelation = index_scheduler.register(kind, None).unwrap(); + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3497,7 +3545,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); handle.advance_one_failed_batch(); @@ -3532,6 +3580,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3573,6 +3622,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3632,6 +3682,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3683,6 +3734,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3714,6 +3766,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3743,6 +3796,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3779,6 +3833,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3808,6 +3863,7 @@ mod tests { allow_index_creation: false, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3848,6 +3904,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, None, + false, ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3878,6 +3935,7 @@ mod tests { allow_index_creation, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3934,6 +3992,7 @@ mod tests { allow_index_creation, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -3989,6 +4048,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4053,6 +4113,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4113,6 +4174,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4197,6 +4259,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4283,6 +4346,7 @@ mod tests { allow_index_creation: true, }, None, + false, ) .unwrap(); index_scheduler.assert_internally_consistent(); @@ -4337,7 +4401,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); @@ -4360,6 +4424,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4368,6 +4433,7 @@ mod tests { let result = index_scheduler.register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ); if result.is_err() { break; @@ -4381,6 +4447,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); @@ -4392,6 +4459,7 @@ mod tests { .register( KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, None, + false, ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); @@ -4403,6 +4471,7 @@ mod tests { .register( KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4412,6 +4481,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4428,6 +4498,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4436,6 +4507,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4446,12 +4518,14 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); index_scheduler .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); @@ -4507,6 +4581,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_successful_batch(); @@ -4515,6 +4590,7 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); handle.advance_one_failed_batch(); @@ -4525,12 +4601,14 @@ mod tests { .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); index_scheduler .register( KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, None, + false, ) .unwrap(); @@ -4555,11 +4633,11 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind, None).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" { @@ -4709,11 +4787,11 @@ mod tests { query: "cancel dump".to_owned(), tasks: RoaringBitmap::from_iter([0]), }; - let _ = index_scheduler.register(dump_creation, None).unwrap(); + let _ = index_scheduler.register(dump_creation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - let _ = index_scheduler.register(dump_cancellation, None).unwrap(); + let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); @@ -4727,15 +4805,86 @@ mod tests { let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, None).unwrap(); + let task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(task.uid, @"0"); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let task = index_scheduler.register(kind, Some(12)).unwrap(); + let task = index_scheduler.register(kind, Some(12), false).unwrap(); snapshot!(task.uid, @"12"); let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; - let error = index_scheduler.register(kind, Some(5)).unwrap_err(); + let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); snapshot!(error, @"Received bad task id: 5 should be >= to 13."); } + + #[test] + fn dry_run() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, true).unwrap(); + snapshot!(task.uid, @"0"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), true).unwrap(); + snapshot!(task.uid, @"12"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 500d56079..de26b771e 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -251,7 +251,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< .name(String::from("register-snapshot-tasks")) .spawn(move || loop { thread::sleep(snapshot_delay); - if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) { + if let Err(e) = + index_scheduler.register(KindWithContent::SnapshotCreation, None, false) + { error!("Error while registering snapshot: {}", e); } }) diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 56231a759..7f3cd06a5 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -11,7 +11,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -32,8 +32,11 @@ pub async fn create_dump( instance_uid: analytics.instance_uid().cloned(), }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 5bf7eaa8d..a74bbff49 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -36,7 +36,9 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use crate::routes::{ + get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, +}; use crate::search::parse_filter; use crate::Opt; @@ -133,8 +135,11 @@ pub async fn delete_document( documents_ids: vec![document_id], }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -282,6 +287,7 @@ pub async fn replace_documents( let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -291,6 +297,7 @@ pub async fn replace_documents( body, IndexDocumentsMethod::ReplaceDocuments, uid, + dry_run, allow_index_creation, ) .await?; @@ -317,6 +324,7 @@ pub async fn update_documents( let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -326,6 +334,7 @@ pub async fn update_documents( body, IndexDocumentsMethod::UpdateDocuments, uid, + dry_run, allow_index_creation, ) .await?; @@ -344,6 +353,7 @@ async fn document_addition( mut body: Payload, method: IndexDocumentsMethod, task_id: Option, + dry_run: bool, allow_index_creation: bool, ) -> Result { let format = match ( @@ -376,7 +386,7 @@ async fn document_addition( } }; - let (uuid, mut update_file) = index_scheduler.create_update_file()?; + let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?; let temp_file = match tempfile() { Ok(file) => file, @@ -460,7 +470,9 @@ async fn document_addition( }; let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? { + let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run)) + .await? + { Ok(task) => task, Err(e) => { index_scheduler.delete_update_file(uuid)?; @@ -492,8 +504,11 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) @@ -530,8 +545,11 @@ pub async fn delete_documents_by_filter( let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) @@ -549,8 +567,11 @@ pub async fn clear_all_documents( let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 59a1f0e64..59fa02dff 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -22,6 +22,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::is_dry_run; use crate::Opt; pub mod documents; @@ -140,8 +141,11 @@ pub async fn create_index( let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) @@ -211,8 +215,11 @@ pub async fn update_index( }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) @@ -227,8 +234,11 @@ pub async fn delete_index( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 6e43bce41..c71d83279 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -15,7 +15,7 @@ use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; #[macro_export] @@ -36,7 +36,7 @@ macro_rules! make_setting_route { use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; use $crate::Opt; - use $crate::routes::{get_task_id, SummarizedTaskView}; + use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; pub async fn delete( index_scheduler: GuardedData< @@ -61,8 +61,9 @@ macro_rules! make_setting_route { allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); @@ -112,8 +113,9 @@ macro_rules! make_setting_route { allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); @@ -776,8 +778,11 @@ pub async fn update_all( allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) @@ -815,8 +820,11 @@ pub async fn delete_all( allow_index_creation, }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 2dc89b150..f98d4b4de 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -77,6 +77,25 @@ pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, Respo Ok(task_id) } +pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { + if !opt.experimental_ha_parameters { + return Ok(false); + } + Ok(req + .headers() + .get("DryRun") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("DryRun is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map_or(false, |s| s.to_lowercase() == "true")) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 6b3178126..84673729f 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -10,7 +10,7 @@ use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{get_task_id, SummarizedTaskView}; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -27,8 +27,11 @@ pub async fn create_snapshot( let task = KindWithContent::SnapshotCreation; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index f8adeeb18..51a7b0707 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde_json::json; -use super::{get_task_id, SummarizedTaskView}; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; @@ -63,7 +63,10 @@ pub async fn swap_indexes( let task = KindWithContent::IndexSwap { swaps }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 279b57e3d..f35d97fe6 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -18,7 +18,7 @@ use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; -use super::{get_task_id, SummarizedTaskView}; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; @@ -200,8 +200,10 @@ async fn cancel_tasks( KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = - task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??; + task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) @@ -248,7 +250,9 @@ async fn delete_tasks( KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; let uid = get_task_id(&req, &opt)?; - let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??; + let dry_run = is_dry_run(&req, &opt)?; + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) From e1a3eed1eb90a660535b67fa11bf8843e309198a Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 12:30:28 +0100 Subject: [PATCH 84/87] update the discussion link --- meilisearch/src/option.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 4dd17d546..f932abac6 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -319,8 +319,7 @@ pub struct Opt { pub experimental_enable_logs_route: bool, /// Enable multiple features that helps you to run meilisearch in a high availability context. - /// TODO: TAMO: Update the discussion link - /// For more information, see: + /// For more information, see: /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task From 693ba8dd15280fe4be1f06fbc27465ff6d7fa551 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 21 Feb 2024 14:33:40 +0100 Subject: [PATCH 85/87] rename the cli parameter --- meilisearch/src/analytics/segment_analytics.rs | 6 +++--- meilisearch/src/lib.rs | 2 +- meilisearch/src/option.rs | 14 +++++++------- meilisearch/src/routes/mod.rs | 4 ++-- meilisearch/tests/index/create_index.rs | 3 ++- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index a78b0d11b..8bb7e8d81 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -250,7 +250,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, - experimental_ha_parameters: bool, + experimental_replication_parameters: bool, experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, @@ -289,7 +289,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, - experimental_ha_parameters, + experimental_replication_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, @@ -337,7 +337,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, - experimental_ha_parameters, + experimental_replication_parameters, experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index de26b771e..1ab161564 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -288,7 +288,7 @@ fn open_or_create_database_unchecked( enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, - cleanup_enabled: !opt.experimental_ha_parameters, + cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index f932abac6..e6ff4f2a1 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,7 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; -const MEILI_EXPERIMENTAL_HA_PARAMETERS: &str = "MEILI_EXPERIMENTAL_HA_PARAMETERS"; +const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = @@ -318,15 +318,15 @@ pub struct Opt { #[serde(default)] pub experimental_enable_logs_route: bool, - /// Enable multiple features that helps you to run meilisearch in a high availability context. + /// Enable multiple features that helps you to run meilisearch in a replicated context. /// For more information, see: /// /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now /// - Lets you specify a custom task ID upon registering a task /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) - #[clap(long, env = MEILI_EXPERIMENTAL_HA_PARAMETERS)] + #[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)] #[serde(default)] - pub experimental_ha_parameters: bool, + pub experimental_replication_parameters: bool, /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] @@ -434,7 +434,7 @@ impl Opt { no_analytics, experimental_enable_metrics, experimental_enable_logs_route, - experimental_ha_parameters, + experimental_replication_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -492,8 +492,8 @@ impl Opt { experimental_enable_metrics.to_string(), ); export_to_env_if_not_present( - MEILI_EXPERIMENTAL_HA_PARAMETERS, - experimental_ha_parameters.to_string(), + MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS, + experimental_replication_parameters.to_string(), ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index f98d4b4de..249103e12 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -47,7 +47,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { } pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { - if !opt.experimental_ha_parameters { + if !opt.experimental_replication_parameters { return Ok(None); } let task_id = req @@ -78,7 +78,7 @@ pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, Respo } pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { - if !opt.experimental_ha_parameters { + if !opt.experimental_replication_parameters { return Ok(false); } Ok(req diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index 7a678624c..b309b83c6 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -205,7 +205,8 @@ async fn error_create_with_invalid_index_uid() { async fn send_task_id() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { experimental_ha_parameters: true, ..default_settings(temp.path()) }; + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let app = server.init_web_app().await; From c2e2003a808f3526ccdb52d8d5033c8e9fc310aa Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 22 Feb 2024 15:51:47 +0100 Subject: [PATCH 86/87] create a test with the dry-run parameter enabled --- meilisearch/tests/common/index.rs | 9 +--- meilisearch/tests/documents/add_documents.rs | 49 +++++++++++++++++++- meilisearch/tests/documents/errors.rs | 41 ++++++++++------ 3 files changed, 76 insertions(+), 23 deletions(-) diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 4992eeb13..16fc10e98 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -100,16 +100,11 @@ impl Index<'_> { pub async fn raw_add_documents( &self, payload: &str, - content_type: Option<&str>, + headers: Vec<(&str, &str)>, query_parameter: &str, ) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter); - - if let Some(content_type) = content_type { - self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await - } else { - self.service.post_str(url, payload, Vec::new()).await - } + self.service.post_str(url, payload, headers).await } pub async fn update_documents( diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 9733f7741..e6af85229 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1,10 +1,11 @@ use actix_web::test; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; use crate::common::encoder::Encoder; -use crate::common::{GetAllDocumentsOptions, Server, Value}; +use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value}; use crate::json; /// This is the basic usage of our API and every other tests uses the content-type application/json @@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() { assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 120); } + +#[actix_rt::test] +async fn dry_register_file() { + let temp = tempfile::tempdir().unwrap(); + + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + let index = server.index("tamo"); + + let documents = r#" + { + "id": "12", + "doggo": "kefir" + } + "#; + + let (response, code) = index + .raw_add_documents( + documents, + vec![("Content-Type", "application/json"), ("DryRun", "true")], + "", + ) + .await; + snapshot!(response, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + snapshot!(code, @"202 Accepted"); + + let (response, code) = index.get_task(response.uid()).await; + snapshot!(response, @r###" + { + "message": "Task `0` not found.", + "code": "task_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#task_not_found" + } + "###); + snapshot!(code, @"404 Not Found"); +} diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index bd06aabce..cd2d89813 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("application/json"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", None, "").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() { "###); // even with a csv delimiter specified this error is triggered first - let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("doggo"), "").await; + let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() { "###); let (response, code) = index - .raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))) + .raw_add_documents( + "", + vec![("Content-Type", "application/json")], + &format!("?csvDelimiter={}", encode("🍰")), + ) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" @@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { From eb90f0b4fbf2ae1da9d9461f4480b764f59745bd Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 22 Feb 2024 18:42:12 +0100 Subject: [PATCH 87/87] fix and remove the file-store hack of /dev/null --- file-store/src/lib.rs | 56 +++++++++------------ index-scheduler/src/lib.rs | 54 ++++++++++---------- meilisearch-types/src/document_formats.rs | 16 +++--- meilisearch/src/routes/indexes/documents.rs | 8 ++- 4 files changed, 63 insertions(+), 71 deletions(-) diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index e3851a2df..15c4168bc 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -1,5 +1,5 @@ use std::fs::File as StdFile; -use std::ops::{Deref, DerefMut}; +use std::io::Write; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -22,20 +22,6 @@ pub enum Error { pub type Result = std::result::Result; -impl Deref for File { - type Target = NamedTempFile; - - fn deref(&self) -> &Self::Target { - &self.file - } -} - -impl DerefMut for File { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.file - } -} - #[derive(Clone, Debug)] pub struct FileStore { path: PathBuf, @@ -56,7 +42,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); - let update_file = File { dry: false, file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -67,7 +53,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::from_u128(uuid); let path = self.path.join(uuid.to_string()); - let update_file = File { dry: false, file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -135,33 +121,41 @@ impl FileStore { } pub struct File { - dry: bool, path: PathBuf, - file: NamedTempFile, + file: Option, } impl File { pub fn dry_file() -> Result { - #[cfg(target_family = "unix")] - let path = PathBuf::from_str("/dev/null").unwrap(); - #[cfg(target_family = "windows")] - let path = PathBuf::from_str("\\Device\\Null").unwrap(); - - Ok(Self { - dry: true, - path: path.clone(), - file: tempfile::Builder::new().make(|_| std::fs::File::create(path.clone()))?, - }) + Ok(Self { path: PathBuf::new(), file: None }) } pub fn persist(self) -> Result<()> { - if !self.dry { - self.file.persist(&self.path)?; + if let Some(file) = self.file { + file.persist(&self.path)?; } Ok(()) } } +impl Write for File { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(file) = self.file.as_mut() { + file.write(buf) + } else { + Ok(buf.len()) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + if let Some(file) = self.file.as_mut() { + file.flush() + } else { + Ok(()) + } + } +} + #[cfg(test)] mod test { use std::io::Write; diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 5d0ce9eb9..1c3b93bce 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1550,7 +1550,7 @@ impl<'a> Dump<'a> { let content_uuid = match content_file { Some(content_file) if task.status == Status::Enqueued => { let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; - let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + let mut builder = DocumentsBatchBuilder::new(&mut file); for doc in content_file { builder.append_json_object(&doc?)?; } @@ -1734,7 +1734,7 @@ pub struct IndexStats { #[cfg(test)] mod tests { - use std::io::{BufWriter, Seek, Write}; + use std::io::{BufWriter, Write}; use std::time::Instant; use big_s::S; @@ -1882,7 +1882,7 @@ mod tests { /// Adapting to the new json reading interface pub fn read_json( bytes: &[u8], - write: impl Write + Seek, + write: impl Write, ) -> std::result::Result { let temp_file = NamedTempFile::new().unwrap(); let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); @@ -1909,7 +1909,7 @@ mod tests { ); let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); (file, documents_count) } @@ -2321,7 +2321,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2366,7 +2366,7 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2406,7 +2406,7 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2472,7 +2472,7 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2678,7 +2678,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2852,7 +2852,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2903,7 +2903,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -2956,7 +2956,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3010,7 +3010,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3065,7 +3065,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3567,7 +3567,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3609,7 +3609,7 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3669,7 +3669,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3721,7 +3721,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3783,7 +3783,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3850,7 +3850,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3922,7 +3922,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -3979,7 +3979,7 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler .register( @@ -4033,7 +4033,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4098,7 +4098,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4159,7 +4159,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4244,7 +4244,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); @@ -4331,7 +4331,7 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 0f1d995f9..50dc5bad4 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -1,6 +1,6 @@ use std::fmt::{self, Debug, Display}; use std::fs::File; -use std::io::{self, Seek, Write}; +use std::io::{self, BufWriter, Write}; use std::marker::PhantomData; use memmap2::MmapOptions; @@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError { } /// Reads CSV from input and write an obkv batch to writer. -pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref()); builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?; @@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result Ok(count as u64) } -/// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +/// Reads JSON from temporary file and write an obkv batch to writer. +pub fn read_json(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let mut deserializer = serde_json::Deserializer::from_slice(&mmap); @@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { } /// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_ndjson(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; for result in serde_json::Deserializer::from_slice(&mmap).into_iter() { diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index a74bbff49..43fab1dae 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -425,11 +425,9 @@ async fn document_addition( let read_file = buffer.into_inner().into_std().await; let documents_count = tokio::task::spawn_blocking(move || { let documents_count = match format { - PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?, - PayloadType::Csv { delimiter } => { - read_csv(&read_file, update_file.as_file_mut(), delimiter)? - } - PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?, + PayloadType::Json => read_json(&read_file, &mut update_file)?, + PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?, + PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?, }; // we NEED to persist the file here because we moved the `udpate_file` in another task. update_file.persist()?;