From 1208daf1aa0df8e9558cf787a4200c3692dfe13d Mon Sep 17 00:00:00 2001 From: Luke Williams <108728588+luke-a-williams@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:14:27 +0000 Subject: [PATCH 1/7] added the complained about error from yesterday's airflow logs --- .../modules/ap_airflow_load_data_iam_role/main.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf index d6cc1b6fea8..cb43a375aa6 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf @@ -73,6 +73,12 @@ data "aws_iam_policy_document" "load_data" { "arn:aws:glue:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:table/${local.snake-database}*/*" ] } + statement { + sid = "GetDataAccessForLakeFormation${local.camel-sid}" + effect = "Allow" + actions = ["lakeformation:GetDataAccess"] + resources = ["*"] + } statement { sid = "ListAccountAlias${local.camel-sid}" effect = "Allow" From 795ef8e455fb6dd072765cc7288cc90d35e436d6 Mon Sep 17 00:00:00 2001 From: Luke Williams <108728588+luke-a-williams@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:43:35 +0000 Subject: [PATCH 2/7] added a suffix for the test environments since fms was failing --- .../modules/ap_airflow_load_data_iam_role/main.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf index cb43a375aa6..a129ed9a9bc 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf @@ -1,6 +1,7 @@ locals { camel-sid = join("", [for word in split("-", var.name) : title(word)]) - snake-database = replace(var.database_name, "-", "_") + suffix = var.environment == "test" ? "_test" : "" + snake-database = "${replace(var.database_name, "-", "_")}${local.suffix}" } data "aws_region" "current" {} From 69275b656bc476f5edf352706407d7a3ed7fca76 Mon Sep 17 00:00:00 2001 From: Luke Williams <108728588+luke-a-williams@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:02:58 +0000 Subject: [PATCH 3/7] not sure if double declaring, but adding the dual permissions into the module, and updating all uses of that module --- .../ap_airflow_iam.tf | 42 +++++++++++++++++++ .../modules/ap_airflow_iam_role/outputs.tf | 3 ++ .../ap_airflow_load_data_iam_role/main.tf | 10 ++++- .../variables.tf | 12 ++++++ 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/outputs.tf diff --git a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf index 72e15036243..6bb861fbe24 100644 --- a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf +++ b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf @@ -22,6 +22,9 @@ module "load_alcohol_monitoring_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "alcohol-monitoring" environment = local.environment database_name = "capita-alcohol-monitoring" @@ -37,6 +40,9 @@ module "load_orca_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "orca" environment = local.environment database_name = "civica-orca" @@ -52,6 +58,9 @@ module "load_atrium_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "atrium" environment = local.environment database_name = "g4s-atrium" @@ -67,6 +76,9 @@ module "load_atv_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "atv" environment = local.environment database_name = "g4s-atv" @@ -82,6 +94,9 @@ module "load_cap_dw_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "cap-dw" environment = local.environment database_name = "g4s-cap-dw" @@ -98,6 +113,9 @@ module "load_emsys_mvp_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "emsys-mvp" environment = local.environment database_name = "g4s-emsys-mvp" @@ -114,6 +132,9 @@ module "load_fep_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "fep" environment = local.environment database_name = "g4s-fep" @@ -129,6 +150,9 @@ module "load_rf_hours_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "rf-hours" environment = local.environment database_name = "g4s-rf-hours" @@ -144,6 +168,9 @@ module "load_subject_history_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "subject-history" environment = local.environment database_name = "g4s-subject-history" @@ -159,6 +186,9 @@ module "load_tasking_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "tasking" environment = local.environment database_name = "g4s-tasking" @@ -174,6 +204,9 @@ module "load_telephony_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "telephony" environment = local.environment database_name = "g4s-telephony" @@ -189,6 +222,9 @@ module "load_unstructured_atrium_database" { count = local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "unstructured-atrium-database" environment = local.environment database_name = "g4s-atrium-unstructured" @@ -205,6 +241,9 @@ module "load_fms" { count = local.is-test || local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "fms" environment = local.environment database_name = "serco-fms" @@ -221,6 +260,9 @@ module "load_mdss" { count = local.is-test || local.is-production ? 1 : 0 source = "./modules/ap_airflow_load_data_iam_role" + data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn + de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) + name = "mdss" environment = local.environment database_name = "allied-mdss" diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/outputs.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/outputs.tf new file mode 100644 index 00000000000..d30de0f3f61 --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/outputs.tf @@ -0,0 +1,3 @@ +output "iam_role" { + value = aws_iam_role.role_ap_airflow +} \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf index a129ed9a9bc..bb7d4070427 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf @@ -94,7 +94,7 @@ data "aws_iam_policy_document" "load_data" { } } -module "load_unstructured_atrium_database" { +module "ap_database_sharing" { source = "../ap_airflow_iam_role" environment = var.environment @@ -105,3 +105,11 @@ module "load_unstructured_atrium_database" { oidc_arn = var.oidc_arn max_session_duration = var.max_session_duration } + +module "share_dbs_with_roles" { + source = "./modules/lakeformation_database_share" + dbs_to_grant = toset([local.snake-database]) + data_bucket_lf_resource = var.data_bucket_lf_resource + role_arn = module.ap_database_sharing.iam_role.arn + de_role_arn = var.de_role_arn +} \ No newline at end of file diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/variables.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/variables.tf index 1e3fedb4918..5bfe8133f64 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/variables.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/variables.tf @@ -50,3 +50,15 @@ variable "max_session_duration" { nullable = true default = 7200 } + +variable "de_role_arn" { + nullable = false + type = string + description = "The arn of the data engineering module" +} + +variable "data_bucket_lf_resource" { + nullable = false + type = string + description = "The arn of the LakeFormation resource where our parquet files are held" +} \ No newline at end of file From 791f01855fb267c4d4b52f541e3ff873f10046e6 Mon Sep 17 00:00:00 2001 From: matt-heery <116661071+matt-heery@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:07:39 +0000 Subject: [PATCH 4/7] repoint module --- .../modules/ap_airflow_load_data_iam_role/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf index bb7d4070427..3723a54382b 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_load_data_iam_role/main.tf @@ -107,9 +107,9 @@ module "ap_database_sharing" { } module "share_dbs_with_roles" { - source = "./modules/lakeformation_database_share" + source = "../lakeformation_database_share" dbs_to_grant = toset([local.snake-database]) data_bucket_lf_resource = var.data_bucket_lf_resource role_arn = module.ap_database_sharing.iam_role.arn de_role_arn = var.de_role_arn -} \ No newline at end of file +} From f6959d660d9e5ae1dcd1e2c925a9c5efb3df9460 Mon Sep 17 00:00:00 2001 From: matt-heery <116661071+matt-heery@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:35:35 +0000 Subject: [PATCH 5/7] name prefix --- .../modules/ap_airflow_iam_role/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf index ebf0aefebe9..db83f7de2f6 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf @@ -38,7 +38,7 @@ data "aws_iam_policy_document" "oidc_assume_role_policy" { # ----------------------------- resource "aws_iam_role" "role_ap_airflow" { - name = local.role_name + name_prefix = local.role_name description = var.role_description assume_role_policy = data.aws_iam_policy_document.oidc_assume_role_policy.json force_detach_policies = true @@ -46,8 +46,8 @@ resource "aws_iam_role" "role_ap_airflow" { } resource "aws_iam_policy" "role_ap_airflow" { - name = local.role_name - policy = var.iam_policy_document + name_prefix = local.role_name + policy = var.iam_policy_document } resource "aws_iam_role_policy_attachment" "role_ap_airflow" { From 8104395503c3b7dc128911327a50c05c912140a6 Mon Sep 17 00:00:00 2001 From: matt-heery <116661071+matt-heery@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:41:51 +0000 Subject: [PATCH 6/7] no prefix for role name --- .../modules/ap_airflow_iam_role/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf index db83f7de2f6..651f65b4a52 100644 --- a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/main.tf @@ -46,8 +46,8 @@ resource "aws_iam_role" "role_ap_airflow" { } resource "aws_iam_policy" "role_ap_airflow" { - name_prefix = local.role_name - policy = var.iam_policy_document + name = local.role_name + policy = var.iam_policy_document } resource "aws_iam_role_policy_attachment" "role_ap_airflow" { From ea47cbd1f3b9398d6ef0095dbfd3c9d4a3f6c8f3 Mon Sep 17 00:00:00 2001 From: matt-heery <116661071+matt-heery@users.noreply.github.com> Date: Wed, 15 Jan 2025 16:46:57 +0000 Subject: [PATCH 7/7] add versions tf and remove database from name of policy --- .../electronic-monitoring-data/ap_airflow_iam.tf | 4 ++-- .../modules/ap_airflow_iam_role/versions.tf | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/versions.tf diff --git a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf index 6bb861fbe24..7932563c35c 100644 --- a/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf +++ b/terraform/environments/electronic-monitoring-data/ap_airflow_iam.tf @@ -225,7 +225,7 @@ module "load_unstructured_atrium_database" { data_bucket_lf_resource = aws_lakeformation_resource.data_bucket.arn de_role_arn = try(one(data.aws_iam_roles.data_engineering_roles.arns)) - name = "unstructured-atrium-database" + name = "unstructured-atrium" environment = local.environment database_name = "g4s-atrium-unstructured" path_to_data = "/load/g4s_atrium_unstructured/structure" @@ -272,4 +272,4 @@ module "load_mdss" { oidc_arn = aws_iam_openid_connect_provider.analytical_platform_compute.arn athena_dump_bucket = module.s3-athena-bucket.bucket cadt_bucket = module.s3-create-a-derived-table-bucket.bucket -} \ No newline at end of file +} diff --git a/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/versions.tf b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/versions.tf new file mode 100644 index 00000000000..2b58ee107cb --- /dev/null +++ b/terraform/environments/electronic-monitoring-data/modules/ap_airflow_iam_role/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + random = { + source = "hashicorp/random" + version = "~> 3.0" + } + } + required_version = ">= 1.0.1" +}