From 42577403d8676bd7702fdbe1b8616bc1846f0ad1 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Sun, 19 Feb 2023 14:40:25 +0100 Subject: [PATCH] Authentication: Directly pass the authfilter to the index scheduler --- Cargo.lock | 1 + index-scheduler/Cargo.toml | 1 + index-scheduler/src/lib.rs | 170 +++++++++++++++++++------------- meilisearch-auth/src/lib.rs | 43 +++----- meilisearch/src/routes/mod.rs | 2 +- meilisearch/src/routes/tasks.rs | 11 +-- 6 files changed, 127 insertions(+), 101 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2b0657da8..8da6a4b51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1904,6 +1904,7 @@ dependencies = [ "insta", "log", "meili-snap", + "meilisearch-auth", "meilisearch-types", "nelson", "page_size 0.5.0", diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 77936c435..99dfaa493 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -19,6 +19,7 @@ dump = { path = "../dump" } enum-iterator = "1.1.3" file-store = { path = "../file-store" } log = "0.4.14" +meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" roaring = { version = "0.10.0", features = ["serde"] } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index d80131681..8c050a34f 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -43,7 +43,6 @@ use file_store::FileStore; use meilisearch_types::error::ResponseError; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::{self, Database, Env, RoTxn}; -use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::milli; use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; @@ -630,13 +629,13 @@ impl IndexScheduler { &self, rtxn: &RoTxn, query: &Query, - authorized_indexes: &Option>, + filters: &meilisearch_auth::AuthFilter, ) -> Result { let mut tasks = self.get_task_ids(rtxn, query)?; // If the query contains a list of index uid or there is a finite list of authorized indexes, // then we must exclude all the kinds that aren't associated to one and only one index. - if query.index_uids.is_some() || authorized_indexes.is_some() { + if query.index_uids.is_some() || !filters.all_indexes_authorized() { for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { tasks -= self.get_kind(rtxn, kind)?; } @@ -644,11 +643,11 @@ impl IndexScheduler { // Any task that is internally associated with a non-authorized index // must be discarded. - if let Some(authorized_indexes) = authorized_indexes { + if !filters.all_indexes_authorized() { let all_indexes_iter = self.index_tasks.iter(rtxn)?; for result in all_indexes_iter { let (index, index_tasks) = result?; - if !authorized_indexes.iter().any(|p| p.matches_str(index)) { + if !filters.is_index_authorized(index) { tasks -= index_tasks; } } @@ -668,12 +667,11 @@ impl IndexScheduler { pub fn get_tasks_from_authorized_indexes( &self, query: Query, - authorized_indexes: Option>, + filters: &meilisearch_auth::AuthFilter, ) -> Result> { let rtxn = self.env.read_txn()?; - let tasks = - self.get_task_ids_from_authorized_indexes(&rtxn, &query, &authorized_indexes)?; + let tasks = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?; let tasks = self.get_existing_tasks( &rtxn, @@ -1120,7 +1118,9 @@ mod tests { use crossbeam::channel::RecvTimeoutError; use file_store::File; use meili_snap::snapshot; + use meilisearch_auth::AuthFilter; use meilisearch_types::document_formats::DocumentFormatError; + use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::milli::obkv_to_json; use meilisearch_types::milli::update::IndexDocumentsMethod::{ ReplaceDocuments, UpdateDocuments, @@ -2371,38 +2371,45 @@ mod tests { let rtxn = index_scheduler.env.read_txn().unwrap(); let query = Query { limit: Some(0), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[]"); let query = Query { limit: Some(1), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[2,]"); let query = Query { limit: Some(2), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); let query = Query { from: Some(1), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); let query = Query { from: Some(2), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[1,]"); let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); } @@ -2427,21 +2434,24 @@ mod tests { let rtxn = index_scheduler.env.read_txn().unwrap(); let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick let query = Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick let query = Query { @@ -2449,8 +2459,9 @@ mod tests { after_started_at: Some(start_time), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both enqueued and processing tasks in the first tick, but limited to those with a started_at // that comes after the start of the test, which should excludes the enqueued tasks snapshot!(snapshot_bitmap(&tasks), @"[0,]"); @@ -2460,8 +2471,9 @@ mod tests { before_started_at: Some(start_time), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both enqueued and processing tasks in the first tick, but limited to those with a started_at // that comes before the start of the test, which should excludes all of them snapshot!(snapshot_bitmap(&tasks), @"[]"); @@ -2472,8 +2484,9 @@ mod tests { before_started_at: Some(start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both enqueued and processing tasks in the first tick, but limited to those with a started_at // that comes after the start of the test and before one minute after the start of the test, // which should exclude the enqueued tasks and include the only processing task @@ -2498,8 +2511,9 @@ mod tests { before_started_at: Some(start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both succeeded and processing tasks in the first tick, but limited to those with a started_at // that comes after the start of the test and before one minute after the start of the test, // which should include all tasks @@ -2510,8 +2524,9 @@ mod tests { before_started_at: Some(start_time), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both succeeded and processing tasks in the first tick, but limited to those with a started_at // that comes before the start of the test, which should exclude all tasks snapshot!(snapshot_bitmap(&tasks), @"[]"); @@ -2522,8 +2537,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // both succeeded and processing tasks in the first tick, but limited to those with a started_at // that comes after the start of the second part of the test and before one minute after the // second start of the test, which should exclude all tasks @@ -2541,8 +2557,9 @@ mod tests { let rtxn = index_scheduler.env.read_txn().unwrap(); - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // we run the same query to verify that, and indeed find that the last task is matched snapshot!(snapshot_bitmap(&tasks), @"[2,]"); @@ -2552,8 +2569,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // enqueued, succeeded, or processing tasks started after the second part of the test, should // again only return the last task snapshot!(snapshot_bitmap(&tasks), @"[2,]"); @@ -2563,8 +2581,9 @@ mod tests { // now the last task should have failed snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // so running the last query should return nothing snapshot!(snapshot_bitmap(&tasks), @"[]"); @@ -2574,8 +2593,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // but the same query on failed tasks should return the last task snapshot!(snapshot_bitmap(&tasks), @"[2,]"); @@ -2585,8 +2605,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // but the same query on failed tasks should return the last task snapshot!(snapshot_bitmap(&tasks), @"[2,]"); @@ -2597,8 +2618,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // same query but with an invalid uid snapshot!(snapshot_bitmap(&tasks), @"[]"); @@ -2609,8 +2631,9 @@ mod tests { before_started_at: Some(second_start_time + Duration::minutes(1)), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // same query but with a valid uid snapshot!(snapshot_bitmap(&tasks), @"[2,]"); } @@ -2640,8 +2663,9 @@ mod tests { let rtxn = index_scheduler.env.read_txn().unwrap(); let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // only the first task associated with catto is returned, the indexSwap tasks are excluded! snapshot!(snapshot_bitmap(&tasks), @"[0,]"); @@ -2650,7 +2674,9 @@ mod tests { .get_task_ids_from_authorized_indexes( &rtxn, &query, - &Some(vec![IndexUidPattern::new_unchecked("doggo")]), + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), ) .unwrap(); // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks @@ -2662,7 +2688,9 @@ mod tests { .get_task_ids_from_authorized_indexes( &rtxn, &query, - &Some(vec![IndexUidPattern::new_unchecked("doggo")]), + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), ) .unwrap(); // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks @@ -2674,10 +2702,14 @@ mod tests { .get_task_ids_from_authorized_indexes( &rtxn, &query, - &Some(vec![ - IndexUidPattern::new_unchecked("catto"), - IndexUidPattern::new_unchecked("doggo"), - ]), + &AuthFilter::with_allowed_indexes( + vec![ + IndexUidPattern::new_unchecked("catto"), + IndexUidPattern::new_unchecked("doggo"), + ] + .into_iter() + .collect(), + ), ) .unwrap(); // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks @@ -2685,8 +2717,9 @@ mod tests { snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); let query = Query::default(); - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // we asked for all the tasks with all index authorized -> all tasks returned snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); } @@ -2717,8 +2750,9 @@ mod tests { let rtxn = index_scheduler.read_txn().unwrap(); let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; - let tasks = - index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default()) + .unwrap(); // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the // taskCancelation itself snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); @@ -2728,7 +2762,9 @@ mod tests { .get_task_ids_from_authorized_indexes( &rtxn, &query, - &Some(vec![IndexUidPattern::new_unchecked("doggo")]), + &AuthFilter::with_allowed_indexes( + vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(), + ), ) .unwrap(); // Return only 1 because the user is not authorized to see task 2 diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index a7a350b40..66c26c00c 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -178,6 +178,19 @@ impl Default for AuthFilter { } impl AuthFilter { + pub fn with_allowed_indexes(allowed_indexes: HashSet) -> Self { + Self { + search_rules: SearchRules::Set(allowed_indexes.clone()), + key_authorized_indexes: SearchRules::Set(allowed_indexes), + allow_index_creation: false, + } + } + + pub fn all_indexes_authorized(&self) -> bool { + self.key_authorized_indexes.all_indexes_authorized() + && self.search_rules.all_indexes_authorized() + } + pub fn is_index_authorized(&self, index: &str) -> bool { self.key_authorized_indexes.is_index_authorized(index) && self.search_rules.is_index_authorized(index) @@ -189,16 +202,6 @@ impl AuthFilter { } self.search_rules.get_index_search_rules(index) } - - /// Return the list of indexes such that `self.is_index_authorized(index) == true`, - /// or `None` if all indexes satisfy this condition. - /// - /// FIXME: this works only when there are no tenant tokens, otherwise it ignores the rules of the API key. - /// - /// It is better to use `is_index_authorized` when possible. - pub fn authorized_indexes(&self) -> Option> { - self.search_rules.authorized_indexes() - } } /// Transparent wrapper around a list of allowed indexes with the search rules to apply for each. @@ -250,24 +253,10 @@ impl SearchRules { } } - /// Return the list of indexes such that `self.is_index_authorized(index) == true`, - /// or `None` if all indexes satisfy this condition. - fn authorized_indexes(&self) -> Option> { + fn all_indexes_authorized(&self) -> bool { match self { - SearchRules::Set(set) => { - if set.contains("*") { - None - } else { - Some(set.iter().cloned().collect()) - } - } - SearchRules::Map(map) => { - if map.contains_key("*") { - None - } else { - Some(map.keys().cloned().collect()) - } - } + SearchRules::Set(set) => set.contains("*"), + SearchRules::Map(map) => map.contains_key("*"), } } } diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 20b9f73f9..27eecebbc 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -255,7 +255,7 @@ pub fn create_all_stats( let mut database_size = 0; let processing_task = index_scheduler.get_tasks_from_authorized_indexes( Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() }, - filters.authorized_indexes(), + filters, )?; // accumulate the size of each indexes let processing_index = processing_task.first().and_then(|task| task.index_uid()); diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index e0252317b..1356ff722 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -319,7 +319,7 @@ async fn cancel_tasks( let tasks = index_scheduler.get_task_ids_from_authorized_indexes( &index_scheduler.read_txn()?, &query, - &index_scheduler.filters().authorized_indexes(), + index_scheduler.filters(), )?; let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; @@ -364,7 +364,7 @@ async fn delete_tasks( let tasks = index_scheduler.get_task_ids_from_authorized_indexes( &index_scheduler.read_txn()?, &query, - &index_scheduler.filters().authorized_indexes(), + index_scheduler.filters(), )?; let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; @@ -398,7 +398,7 @@ async fn get_tasks( let query = params.into_query(); let mut tasks_results: Vec = index_scheduler - .get_tasks_from_authorized_indexes(query, index_scheduler.filters().authorized_indexes())? + .get_tasks_from_authorized_indexes(query, index_scheduler.filters())? .into_iter() .map(|t| TaskView::from_task(&t)) .collect(); @@ -436,9 +436,8 @@ async fn get_task( let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; - if let Some(task) = index_scheduler - .get_tasks_from_authorized_indexes(query, index_scheduler.filters().authorized_indexes())? - .first() + if let Some(task) = + index_scheduler.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?.first() { let task_view = TaskView::from_task(task); Ok(HttpResponse::Ok().json(task_view))