diff --git a/BENCHMARKS.md b/BENCHMARKS.md index e1d0c5feb..6d2cc6100 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -109,6 +109,12 @@ They are JSON files with the following structure (comments are not actually supp "run_count": 3, // List of arguments to add to the Meilisearch command line. "extra_cli_args": ["--max-indexing-threads=1"], + // An expression that can be parsed as a comma-separated list of targets and levels + // as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples). + // The expression is used to filter the spans that are measured for profiling purposes. + // Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is + // "search::=trace" + "target": "indexing::=trace", // List of named assets that can be used in the commands. "assets": { // name of the asset. diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index a3a4b48a3..375060889 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -752,10 +752,15 @@ fn prepare_search<'t>( SearchKind::SemanticOnly { embedder_name, embedder } => { let vector = match query.vector.clone() { Some(vector) => vector, - None => embedder - .embed_one(query.q.clone().unwrap()) - .map_err(milli::vector::Error::from) - .map_err(milli::Error::from)?, + None => { + let span = tracing::trace_span!(target: "search::vector", "embed_one"); + let _entered = span.enter(); + + embedder + .embed_one(query.q.clone().unwrap()) + .map_err(milli::vector::Error::from) + .map_err(milli::Error::from)? + } }; search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index f7e1aa492..2102bf479 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -17,6 +17,7 @@ struct ScoreWithRatioResult { type ScoreWithRatio = (Vec, f32); +#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")] fn compare_scores( &(ref left_scores, left_ratio): &ScoreWithRatio, &(ref right_scores, right_ratio): &ScoreWithRatio, @@ -84,6 +85,7 @@ impl ScoreWithRatioResult { } } + #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")] fn merge( vector_results: Self, keyword_results: Self, @@ -150,6 +152,7 @@ impl ScoreWithRatioResult { } impl<'a> Search<'a> { + #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")] pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option)> { // TODO: find classier way to achieve that than to reset vector and query params // create separate keyword and semantic searches @@ -194,6 +197,9 @@ impl<'a> Search<'a> { Some(vector_query) => vector_query, None => { // attempt to embed the vector + let span = tracing::trace_span!(target: "search::hybrid", "embed_one"); + let _entered = span.enter(); + match embedder.embed_one(query) { Ok(embedding) => embedding, Err(error) => { diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 9255e4c09..8f1deb265 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -213,9 +213,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; } - let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id()); - let entered = span.enter(); - let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( ctx, logger, @@ -225,7 +222,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( back!(); continue; }; - drop(entered); ranking_rule_scores.push(next_bucket.score); diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs index 41b70ae39..d270c4847 100644 --- a/milli/src/search/new/exact_attribute.rs +++ b/milli/src/search/new/exact_attribute.rs @@ -27,6 +27,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute { "exact_attribute".to_owned() } + #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")] fn start_iteration( &mut self, ctx: &mut SearchContext<'ctx>, @@ -38,6 +39,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute { Ok(()) } + #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")] fn next_bucket( &mut self, _ctx: &mut SearchContext<'ctx>, @@ -51,6 +53,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute { Ok(output) } + #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")] fn end_iteration( &mut self, _ctx: &mut SearchContext<'ctx>, diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs index 4081c9637..dc09ede99 100644 --- a/milli/src/search/new/geo_sort.rs +++ b/milli/src/search/new/geo_sort.rs @@ -209,6 +209,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { "geo_sort".to_owned() } + #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] fn start_iteration( &mut self, ctx: &mut SearchContext<'ctx>, @@ -234,6 +235,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { Ok(()) } + #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] #[allow(clippy::only_used_in_recursion)] fn next_bucket( &mut self, @@ -285,6 +287,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { self.next_bucket(ctx, logger, universe) } + #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger) { // we do not reset the rtree here, it could be used in a next iteration self.query = None; diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index b066f82bd..fabfd1fdf 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -127,6 +127,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase fn id(&self) -> String { self.id.clone() } + + #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")] fn start_iteration( &mut self, ctx: &mut SearchContext<'ctx>, @@ -209,6 +211,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase Ok(()) } + #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")] fn next_bucket( &mut self, ctx: &mut SearchContext<'ctx>, @@ -358,6 +361,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score })) } + #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")] fn end_iteration( &mut self, _ctx: &mut SearchContext<'ctx>, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 25ce482d3..4cecb19e5 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -212,7 +212,7 @@ fn resolve_maximally_reduced_query_graph( Ok(docids) } -#[tracing::instrument(level = "trace", skip_all, target = "search")] +#[tracing::instrument(level = "trace", skip_all, target = "search::universe")] fn resolve_universe( ctx: &mut SearchContext, initial_universe: &RoaringBitmap, @@ -229,7 +229,7 @@ fn resolve_universe( ) } -#[tracing::instrument(level = "trace", skip_all, target = "search")] +#[tracing::instrument(level = "trace", skip_all, target = "search::query")] fn resolve_negative_words( ctx: &mut SearchContext, negative_words: &[Word], @@ -243,7 +243,7 @@ fn resolve_negative_words( Ok(negative_bitmap) } -#[tracing::instrument(level = "trace", skip_all, target = "search")] +#[tracing::instrument(level = "trace", skip_all, target = "search::query")] fn resolve_negative_phrases( ctx: &mut SearchContext, negative_phrases: &[LocatedQueryTerm], @@ -548,7 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( Ok(()) } -#[tracing::instrument(level = "trace", skip_all, target = "search")] +#[tracing::instrument(level = "trace", skip_all, target = "search::universe")] pub fn filtered_universe( index: &Index, txn: &RoTxn<'_>, @@ -620,7 +620,7 @@ pub fn execute_vector_search( } #[allow(clippy::too_many_arguments)] -#[tracing::instrument(level = "trace", skip_all, target = "search")] +#[tracing::instrument(level = "trace", skip_all, target = "search::main")] pub fn execute_search( ctx: &mut SearchContext, query: Option<&str>, diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index c5e58c635..e4cb335d7 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -44,6 +44,7 @@ fn compute_docids( impl RankingRuleGraphTrait for ExactnessGraph { type Condition = ExactnessCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -71,6 +72,7 @@ impl RankingRuleGraphTrait for ExactnessGraph { }) } + #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")] fn build_edges( _ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -86,6 +88,7 @@ impl RankingRuleGraphTrait for ExactnessGraph { Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)]) } + #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank)) } diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs index a4a08ea46..9cd7e8ee4 100644 --- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -20,6 +20,7 @@ pub enum FidGraph {} impl RankingRuleGraphTrait for FidGraph { type Condition = FidCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::fid")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -44,6 +45,7 @@ impl RankingRuleGraphTrait for FidGraph { }) } + #[tracing::instrument(level = "trace", skip_all, target = "search::fid")] fn build_edges( ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -101,6 +103,7 @@ impl RankingRuleGraphTrait for FidGraph { Ok(edges) } + #[tracing::instrument(level = "trace", skip_all, target = "search::fid")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::Fid(rank) } diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs index 646ff954a..daf227f31 100644 --- a/milli/src/search/new/ranking_rule_graph/position/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs @@ -20,6 +20,7 @@ pub enum PositionGraph {} impl RankingRuleGraphTrait for PositionGraph { type Condition = PositionCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::position")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -44,6 +45,7 @@ impl RankingRuleGraphTrait for PositionGraph { }) } + #[tracing::instrument(level = "trace", skip_all, target = "search::position")] fn build_edges( ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -117,6 +119,7 @@ impl RankingRuleGraphTrait for PositionGraph { Ok(edges) } + #[tracing::instrument(level = "trace", skip_all, target = "search::position")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::Position(rank) } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 532ace626..faa43a930 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -21,6 +21,7 @@ pub enum ProximityGraph {} impl RankingRuleGraphTrait for ProximityGraph { type Condition = ProximityCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -29,6 +30,7 @@ impl RankingRuleGraphTrait for ProximityGraph { compute_docids::compute_docids(ctx, condition, universe) } + #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")] fn build_edges( ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -38,6 +40,7 @@ impl RankingRuleGraphTrait for ProximityGraph { build::build_edges(ctx, conditions_interner, source_term, dest_term) } + #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::Proximity(rank) } diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index 035106ac3..225782168 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -19,6 +19,7 @@ pub enum TypoGraph {} impl RankingRuleGraphTrait for TypoGraph { type Condition = TypoCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::typo")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -37,6 +38,7 @@ impl RankingRuleGraphTrait for TypoGraph { }) } + #[tracing::instrument(level = "trace", skip_all, target = "search::typo")] fn build_edges( ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -77,6 +79,7 @@ impl RankingRuleGraphTrait for TypoGraph { Ok(edges) } + #[tracing::instrument(level = "trace", skip_all, target = "search::typo")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::Typo(score_details::Typo::from_rank(rank)) } diff --git a/milli/src/search/new/ranking_rule_graph/words/mod.rs b/milli/src/search/new/ranking_rule_graph/words/mod.rs index 45a56829f..43542c81e 100644 --- a/milli/src/search/new/ranking_rule_graph/words/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs @@ -18,6 +18,7 @@ pub enum WordsGraph {} impl RankingRuleGraphTrait for WordsGraph { type Condition = WordsCondition; + #[tracing::instrument(level = "trace", skip_all, target = "search::words")] fn resolve_condition( ctx: &mut SearchContext, condition: &Self::Condition, @@ -36,6 +37,7 @@ impl RankingRuleGraphTrait for WordsGraph { }) } + #[tracing::instrument(level = "trace", skip_all, target = "search::words")] fn build_edges( _ctx: &mut SearchContext, conditions_interner: &mut DedupInterner, @@ -45,6 +47,7 @@ impl RankingRuleGraphTrait for WordsGraph { Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))]) } + #[tracing::instrument(level = "trace", skip_all, target = "search::words")] fn rank_to_score(rank: Rank) -> ScoreDetails { ScoreDetails::Words(score_details::Words::from_rank(rank)) } diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index fb234b293..d95a899b7 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -88,6 +88,8 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, let Self { field_name, is_ascending, .. } = self; format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" }) } + + #[tracing::instrument(level = "trace", skip_all, target = "search::sort")] fn start_iteration( &mut self, ctx: &mut SearchContext<'ctx>, @@ -186,6 +188,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Ok(()) } + #[tracing::instrument(level = "trace", skip_all, target = "search::sort")] fn next_bucket( &mut self, _ctx: &mut SearchContext<'ctx>, @@ -211,6 +214,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, } } + #[tracing::instrument(level = "trace", skip_all, target = "search::sort")] fn end_iteration( &mut self, _ctx: &mut SearchContext<'ctx>, diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index cd69b6c47..c227c7c3f 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -73,6 +73,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { "vector_sort".to_owned() } + #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")] fn start_iteration( &mut self, ctx: &mut SearchContext<'ctx>, @@ -89,6 +90,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { } #[allow(clippy::only_used_in_recursion)] + #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")] fn next_bucket( &mut self, ctx: &mut SearchContext<'ctx>, @@ -139,6 +141,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort { self.next_bucket(ctx, _logger, universe) } + #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")] fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger) { self.query = None; } diff --git a/workloads/search/embeddings-movies-subset-hf.json b/workloads/search/embeddings-movies-subset-hf.json new file mode 100644 index 000000000..aeeecac59 --- /dev/null +++ b/workloads/search/embeddings-movies-subset-hf.json @@ -0,0 +1,171 @@ +{ + "name": "search-movies-subset-hf-embeddings", + "run_count": 2, + "target": "search::=trace", + "extra_cli_args": [ + "--max-indexing-threads=4" + ], + "assets": { + "movies-100.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json", + "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6" + } + }, + "precommands": [ + { + "route": "experimental-features", + "method": "PATCH", + "body": { + "inline": { + "vectorStore": true + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "embedders": { + "default": { + "source": "huggingFace", + "documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}" + } + } + } + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies-100.json" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "puppy cute comforting movie", + "limit": 100, + "hybrid": { + "semanticRatio": 0.1 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "puppy cute comforting movie", + "limit": 100, + "hybrid": { + "semanticRatio": 0.5 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "puppy cute comforting movie", + "limit": 100, + "hybrid": { + "semanticRatio": 0.9 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "puppy cute comforting movie", + "limit": 100, + "hybrid": { + "semanticRatio": 1.0 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "shrek", + "limit": 100, + "hybrid": { + "semanticRatio": 1.0 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "shrek", + "limit": 100, + "hybrid": { + "semanticRatio": 0.5 + } + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "shrek", + "limit": 100, + "hybrid": { + "semanticRatio": 0.1 + } + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/workloads/search/filterable-movies.json b/workloads/search/filterable-movies.json new file mode 100644 index 000000000..5585c6eaf --- /dev/null +++ b/workloads/search/filterable-movies.json @@ -0,0 +1,94 @@ +{ + "name": "search-sortable-movies.json", + "run_count": 10, + "target": "search::=trace", + "extra_cli_args": [], + "assets": { + "movies.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json", + "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" + } + }, + "precommands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies.json" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "", + "limit": 100, + "filter": "genres IN [action, comedy, adventure] AND release_date = 233366400" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "Batman returns", + "limit": 100, + "filter": "genres IN [action, comedy, adventure] AND release_date > 233366400" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "the", + "limit": 100, + "filter": "genres IN [animation, comedy, adventure] AND release_date < 233366400" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "t", + "limit": 100, + "filter": "genres = Family AND release_date <= 233366400 OR release_date >= 1054252800" + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/workloads/search/geosort.json b/workloads/search/geosort.json new file mode 100644 index 000000000..00788f70b --- /dev/null +++ b/workloads/search/geosort.json @@ -0,0 +1,340 @@ +{ + "name": "search-geosort.jsonl_1M", + "run_count": 3, + "target": "search::=trace", + "extra_cli_args": [], + "assets": { + "smol-all-countries-100k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-100k.jsonl", + "sha256": "d00924689abc02d09ec4667cc5a18364ff7bc236bad51367f34b9184b945ece3" + }, + "smol-all-countries-200k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-200k.jsonl", + "sha256": "2a215b43b35d596d9da4f1071deab9002a93602e6dbf1308fba53eb89d9c5a9e" + }, + "smol-all-countries-300k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-300k.jsonl", + "sha256": "91d94d78eeb10d631557a5ccf775e74a41d14ccaff4d7121dd90c7aa35534f2b" + }, + "smol-all-countries-400k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-400k.jsonl", + "sha256": "ee883a353b571f35f4abb79b95cfa628f3f1c582919dd658a388b220f97fe035" + }, + "smol-all-countries-500k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-500k.jsonl", + "sha256": "5be254ce4c50db12b7f1795859b8bbdcbc2ec22bccb3a1898899bd4c4765a1bf" + }, + "smol-all-countries-600k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-600k.jsonl", + "sha256": "3aa91afe3361f5185c142125dfcdc8ddcb7d39fdeeeb4f5e67439511905e9826" + }, + "smol-all-countries-700k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-700k.jsonl", + "sha256": "5a864a1e9d89736147a8da594e2cbce5264979326d38655d0945d8447f3867b3" + }, + "smol-all-countries-800k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-800k.jsonl", + "sha256": "d85eb9c85a612fd7b77623e162ecd0f8265ba3be97054e26b9cff7c48735809b" + }, + "smol-all-countries-900k.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-900k.jsonl", + "sha256": "4fd6662e8b9bfcd9fad7d5dcd691a47ec985d810d1e340465c056ee84e9c40f3" + }, + "smol-all-countries-1M.jsonl": { + "local_location": null, + "format": "NdJson", + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-1M.jsonl", + "sha256": "585a713b489b154b94e7c07707bd369f888c7fe24eb90bf604578d7adf51a9e6" + } + }, + "precommands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "displayedAttributes": [ + "geonameid", + "name", + "asciiname", + "alternatenames", + "_geo", + "population" + ], + "searchableAttributes": [ + "name", + "alternatenames", + "elevation" + ], + "filterableAttributes": [ + "_geo", + "population", + "elevation" + ], + "sortableAttributes": [ + "_geo", + "population", + "elevation" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-100k.jsonl" + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-200k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-300k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-400k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-500k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-600k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-700k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-800k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-900k.jsonl" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "smol-all-countries-1M.jsonl" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "", + "limit": 100 + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(50.62999333378238, 3.086269263384099):asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(50.62999333378238, 3.086269263384099):desc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(35.749512532692144, 139.61664952543356):asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(35.749512532692144, 139.61664952543356):desc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(-48.87561645055408, -123.39275749319793):asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "sort": [ + "_geoPoint(-48.87561645055408, -123.39275749319793):desc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(50.62999333378238, 3.086269263384099, 100000)" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(50.62999333378238, 3.086269263384099, 1000)" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(35.749512532692144, 139.61664952543356, 100000)" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(35.749512532692144, 139.61664952543356, 1000)" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 100000)" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 1000)" + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/workloads/search/hackernews.json b/workloads/search/hackernews.json new file mode 100644 index 000000000..8d0a4b9b7 --- /dev/null +++ b/workloads/search/hackernews.json @@ -0,0 +1,255 @@ +{ + "name": "search-hackernews.ndjson_1M", + "run_count": 3, + "target": "search::=trace", + "extra_cli_args": [], + "assets": { + "hackernews-100_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson", + "sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213" + }, + "hackernews-200_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson", + "sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685" + }, + "hackernews-300_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson", + "sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2" + }, + "hackernews-400_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson", + "sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7" + }, + "hackernews-500_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson", + "sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083" + }, + "hackernews-600_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson", + "sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe" + }, + "hackernews-700_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson", + "sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b" + }, + "hackernews-800_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson", + "sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546" + }, + "hackernews-900_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson", + "sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9" + }, + "hackernews-1_000_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson", + "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe" + } + }, + "precommands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "displayedAttributes": [ + "title", + "by", + "score", + "time" + ], + "searchableAttributes": [ + "title" + ], + "filterableAttributes": [ + "by" + ], + "sortableAttributes": [ + "score", + "time" + ], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-100_000.ndjson" + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-200_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-300_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-400_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-500_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-600_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-700_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-800_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-900_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-1_000_000.ndjson" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "rust meilisearch", + "limit": 100, + "filter": "by = tpayet", + "sort": [ + "score:desc", + "time:asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "rust meilisearch", + "limit": 100, + "filter": "NOT by = tpayet", + "sort": [ + "score:desc", + "time:asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "meilisearch", + "limit": 100, + "sort": [ + "score:desc", + "time:desc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "rust", + "limit": 100, + "filter": "by = dang", + "sort": [ + "score:desc", + "time:asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "combinator YC", + "limit": 100, + "filter": "by = dang", + "sort": [ + "score:desc", + "time:asc" + ] + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/workloads/search/movies.json b/workloads/search/movies.json new file mode 100644 index 000000000..939fe6111 --- /dev/null +++ b/workloads/search/movies.json @@ -0,0 +1,90 @@ +{ + "name": "search-movies.json", + "run_count": 10, + "target": "search::=trace", + "extra_cli_args": [], + "assets": { + "movies.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json", + "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" + } + }, + "precommands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies.json" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "", + "limit": 100 + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "Batman returns", + "limit": 100 + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "q": "the" + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "limit": 100, + "q": "t" + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/workloads/search/sortable-movies.json b/workloads/search/sortable-movies.json new file mode 100644 index 000000000..5174b0ec1 --- /dev/null +++ b/workloads/search/sortable-movies.json @@ -0,0 +1,110 @@ +{ + "name": "search-sortable-movies.json", + "run_count": 10, + "target": "search::=trace", + "extra_cli_args": [], + "assets": { + "movies.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json", + "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" + } + }, + "precommands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "searchCutoffMs": 15000 + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies.json" + }, + "synchronous": "WaitForTask" + } + ], + "commands": [ + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "", + "limit": 100, + "sort": [ + "release_date:asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "Batman returns", + "limit": 100, + "sort": [ + "release_date:desc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "the", + "limit": 100, + "sort": [ + "release_date:asc" + ] + } + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/search", + "method": "POST", + "body": { + "inline": { + "q": "t", + "limit": 100, + "sort": [ + "release_date:asc" + ] + } + }, + "synchronous": "WaitForResponse" + } + ] +} \ No newline at end of file diff --git a/xtask/src/bench/workload.rs b/xtask/src/bench/workload.rs index db44b5a8f..19c8bfae8 100644 --- a/xtask/src/bench/workload.rs +++ b/xtask/src/bench/workload.rs @@ -23,6 +23,8 @@ pub struct Workload { pub extra_cli_args: Vec, pub assets: BTreeMap, #[serde(default)] + pub target: String, + #[serde(default)] pub precommands: Vec, pub commands: Vec, } @@ -54,7 +56,7 @@ async fn run_commands( let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json"); let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json"); - let report_handle = start_report(logs_client, trace_filename).await?; + let report_handle = start_report(logs_client, trace_filename, &workload.target).await?; for batch in workload .commands @@ -160,7 +162,11 @@ async fn execute_run( async fn start_report( logs_client: &Client, filename: String, + target: &str, ) -> anyhow::Result>> { + const DEFAULT_TARGET: &str = "indexing::=trace"; + let target = if target.is_empty() { DEFAULT_TARGET } else { target }; + let report_file = std::fs::File::options() .create(true) .truncate(true) @@ -174,7 +180,7 @@ async fn start_report( .post("") .json(&json!({ "mode": "profile", - "target": "indexing::=trace" + "target": target, })) .send() .await