From f1423a86e11ee67a05c0f46a6ece2380a819c84a Mon Sep 17 00:00:00 2001 From: Colin Saliceti Date: Fri, 11 Oct 2024 17:55:59 +0100 Subject: [PATCH 1/4] New dfe_anlytics module Create resources in Google cloud Bigquery and IAM to receive events from applications --- aks/dfe_analytics/.terraform.lock.hcl | 64 ++++++++++ aks/dfe_analytics/data.tf | 11 ++ aks/dfe_analytics/files/events.json.tmpl | 145 +++++++++++++++++++++++ aks/dfe_analytics/outputs.tf | 25 ++++ aks/dfe_analytics/provider.tf | 19 +++ aks/dfe_analytics/resources.tf | 72 +++++++++++ aks/dfe_analytics/variables.tf | 88 ++++++++++++++ 7 files changed, 424 insertions(+) create mode 100644 aks/dfe_analytics/.terraform.lock.hcl create mode 100644 aks/dfe_analytics/data.tf create mode 100644 aks/dfe_analytics/files/events.json.tmpl create mode 100644 aks/dfe_analytics/outputs.tf create mode 100644 aks/dfe_analytics/provider.tf create mode 100644 aks/dfe_analytics/resources.tf create mode 100644 aks/dfe_analytics/variables.tf diff --git a/aks/dfe_analytics/.terraform.lock.hcl b/aks/dfe_analytics/.terraform.lock.hcl new file mode 100644 index 0000000..9aaf8b8 --- /dev/null +++ b/aks/dfe_analytics/.terraform.lock.hcl @@ -0,0 +1,64 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/eppo/environment" { + version = "1.3.5" + constraints = "1.3.5" + hashes = [ + "h1:1Af95/IhzW16rbX8kSApfrAi8vwc5+7uVbCeyVaGw2E=", + "zh:00e7a6bf7f0f09cc4871d7f4fee2c943ce61c05b9802365a97703d6c2e63e3dc", + "zh:018d92e621177d053ed5c32e8220efa8c019852c4d60cc7539683bac28470d9b", + "zh:12ca5162286b80b7f46bd013ae2007641132d201af12bc6adb872f9a0ff85b7a", + "zh:2991085432bd4dc718aadfb37b2cdb6201ef73a8a0e5661411f46d9ec782e678", + "zh:2a8f6801266f89b816ebfdb441411e53f4cf1e0278e853715fb561946ad5a575", + "zh:8783a8dc846d3e71b38ca470066f506dde8040f149402f0d348e5dca7f012909", + "zh:8bc8f61e496e96c81c46e1aa59bf2155b6acc80db1ea462f2ddd665748fcda7f", + "zh:95fb102fecceb3a5b44dbe9fbe262494a0abdb6805addf1286c5d92cd4b0f779", + "zh:a158837ec561c161d3c47068e30bca341e5e4c7abff7fa72b9522438b85af4ac", + "zh:a738a7b2e953ee8059f9e68d48ae954175d001a5480f29e22d717bee9fd93f7f", + "zh:bac4b3a38eed35c91269cd008ad88862f47be99474de85e9a2efcce6564e0c24", + "zh:cd56a12eef3515fa5a5845d550be2f67989c8e65563e8fa9f5060666c0728a7c", + "zh:e3e895bc8b557b36bfa03f251df429aa0fba068f4c7ef0ed6ac551b7cba9ff86", + "zh:e959a9e826e3c33242bf4492ee12e5f8be023cf2461702c43d1833c4a8516232", + "zh:f41d9d60b205e6d536881e4af7bb9fc85ae90858bfddf695f95fbd68e01e0ad3", + ] +} + +provider "registry.terraform.io/hashicorp/azurerm" { + version = "4.5.0" + hashes = [ + "h1:iIQmNl0NPEZsxS8pXTF+VGpxyfXtw5DOB4mW/kvrHy8=", + "zh:27ac12977bdb7b82217a3fe35d3206e1e4261465d738aff93244ec90f2bd431a", + "zh:36a619af3767a92ee892c5de24604eeb9f23a5a01bb8455115a5eb4bd656f234", + "zh:45a374637b794427c5e07d23c6312d92d58bed3594789322c109d333ea1865e5", + "zh:538e501d313cfc0b61f3b2e5be9ae7755df3d3d9a3e4f14e0ea6a943d5102109", + "zh:64d8e4b94a1324292fe318bf27c6149aa345eabab8b89d9d78ce447ce5600e65", + "zh:7b3fcc0a724c5e00e6ce0e7da22010b6ae4bd2622544ef4d31fd4100f85985d7", + "zh:84876a614b010ae5dbef1b1edd9a22447cf57b9300b9eaf4321d587bfebf82dc", + "zh:850e3900fb2b55ad85b6def8b580fb851778bb470be5354cb0a0244d03acd5a4", + "zh:b6355d1eb7d165b246ad9c8f7c0ce7ccd5bbc58a01bd853c7ca896c71f4cd295", + "zh:bd4f1558f24af356d372937b810801555471eafbbc0552471bb6760f8ddd6b7e", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:f78eaaf507ab56041112b765f6ca1740221773f3b32710bb8d087f29a686f30f", + ] +} + +provider "registry.terraform.io/hashicorp/google" { + version = "6.6.0" + constraints = "6.6.0" + hashes = [ + "h1:bNj7UyO9+IdcTbkZJgjULH89DrJSaBCRw89zt6g8ajg=", + "zh:0c181f9b9f0ab81731e5c4c2d20b6d342244506687437dad94e279ef2a588f68", + "zh:12a4c333fc0ba670e87f09eb574e4b7da90381f9929ef7c866048b6841cc8a6a", + "zh:15c277c2052df89429051350df4bccabe4cf46068433d4d8c673820d9756fc00", + "zh:35d1663c81b81cd98d768fa7b80874b48c51b27c036a3c598a597f653374d3c8", + "zh:56b268389758d544722a342da4174c486a40ffa2a49b45a06111fe31c6c9c867", + "zh:abd3ea8c3a62928ba09ba7eb42b52f53e682bd65e92d573f1739596b5a9a67b1", + "zh:be55a328d61d9db58690db74ed43614111e1105e5e52cee15acaa062df4e233e", + "zh:ce2317ce9fd02cf14323f9e061c43a415b4ae9b3f96046460d0e6b6529a5aa6c", + "zh:d54a6d8e031c824f1de21b93c3e01ed7fec134b4ae55223d08868c6168c98e47", + "zh:d8c6e33b5467c6eb5a970adb251c4c8194af12db5388cff9d4b250294eae4daa", + "zh:f49e4cc9c0b55b3bec7da64dd698298345634a5df372228ee12aa45e57982f64", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} diff --git a/aks/dfe_analytics/data.tf b/aks/dfe_analytics/data.tf new file mode 100644 index 0000000..79b122b --- /dev/null +++ b/aks/dfe_analytics/data.tf @@ -0,0 +1,11 @@ +module "cluster_data" { + source = "../cluster_data" + name = var.cluster +} + +data "azurerm_client_config" "current" {} + +data "azurerm_user_assigned_identity" "gcp_wif" { + name = "${var.azure_resource_prefix}-gcp-wif-${var.cluster}-${var.namespace}-id" + resource_group_name = module.cluster_data.configuration_map.resource_group_name +} diff --git a/aks/dfe_analytics/files/events.json.tmpl b/aks/dfe_analytics/files/events.json.tmpl new file mode 100644 index 0000000..13ba945 --- /dev/null +++ b/aks/dfe_analytics/files/events.json.tmpl @@ -0,0 +1,145 @@ +[ + { + "description": "The timestamp at which the event occurred in the application.", + "mode": "REQUIRED", + "name": "occurred_at", + "type": "TIMESTAMP" + }, + { + "description": "The type of the event, for example web_request. This determines the schema of the data which will be included in the data field.", + "mode": "REQUIRED", + "name": "event_type", + "type": "STRING" + }, + { + "description": "If a user was logged in when they sent a web request event that is this event, then this is the UID of this user.", + "name": "user_id", + "type": "STRING" + }, + { + "description": "Unique ID of the web request, if this event is a web request event", + "name": "request_uuid", + "type": "STRING" + }, + { + "description": "Whether this web request was a GET or POST request, if this event is a web request event.", + "name": "request_method", + "type": "STRING" + }, + { + "description": "The path, starting with a / and excluding any query parameters, of this web request, if this event is a web request", + "name": "request_path", + "type": "STRING" + }, + { + "description": "The user agent of this web request, if this event is a web request. Allows a user's browser and operating system to be identified", + "name": "request_user_agent", + "type": "STRING" + }, + { + "description": "The URL of any page the user was viewing when they initiated this web request, if this event is a web request. This is the full URL, including protocol (https://) and any query parameters, if the browser shared these with our application as part of the web request. It is very common for this referer to be truncated for referrals from external sites.", + "name": "request_referer", + "type": "STRING" + }, + { + "description": "ARRAY of STRUCTs, each with a key and a value. Contains any query parameters that were sent to the application as part of this web reques, if this event is a web request.", + "fields": [ + { + "description": "Name of the query parameter e.g. if the URL ended ?foo=bar then this will be foo.", + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "description": "Contents of the query parameter e.g. if the URL ended ?foo=bar then this will be bar.", + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "request_query", + "type": "RECORD" + }, + { + "description": "Content type of any data that was returned to the browser following this web request, if this event is a web request. For example, 'text/html; charset=utf-8'. Image views, for example, may have a non-text/html content type.", + "name": "response_content_type", + "type": "STRING" + }, + { + "description": "HTTP response code returned by the application in response to this web request, if this event is a web request. See https://developer.mozilla.org/en-US/docs/Web/HTTP/Status.", + "name": "response_status", + "type": "STRING" + }, + { + "description": "ARRAY of STRUCTs, each with a key and a value. Contains a set of data points appropriate to the event_type of this event. For example, if this event was an entity create, update, delete or import event, data will contain the values of each field in the database after this event took place - according to the settings in the analytics.yml configured for this instance of dfe-analytics. Value be anonymised as a one way hash, depending on configuration settings.", + "fields": [ + { + "description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "DATA", + "type": "RECORD" + }, + { + "description": "If event_type was an entity create, update, delete or import event, the name of the table in the database that this entity is stored in. NULL otherwise.", + "name": "entity_table_name", + "type": "STRING" + }, + { + "description": "Currently left blank for future use.", + "mode": "REPEATED", + "name": "event_tags", + "type": "STRING" + }, + { + "description": "One way hash of a combination of the user's IP address and user agent, if this event is a web request. Can be used to identify the user anonymously, even when user_id is not set. Cannot be used to identify the user over a time period of longer than about a month, because of IP address changes and browser updates.", + "name": "anonymised_user_agent_and_ip", + "type": "STRING" + }, + { + "description": "The application environment that the event was streamed from.", + "name": "environment", + "type": "STRING" + }, + { + "description": "The namespace of the instance of dfe-analytics that streamed this event. For example this might identify the name of the service that streamed the event.", + "name": "namespace", + "type": "STRING" + }, + { + "description": "Defined in the same way as the DATA ARRAY of STRUCTs, except containing fields configured to be hidden in analytics_hidden_pii.yml", + "fields": [ + { + "description": "Name of the field in the entity_table_name table in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REQUIRED", + "name": "KEY", + "type": "STRING" + }, + { + "description": "Contents of the field in the database after it was created or updated, or just before it was imported or destroyed.", + "mode": "REPEATED", + "name": "value", + "policyTags": { + "names": [ + "${policy_tag_name}" + ] + }, + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "hidden_DATA", + "type": "RECORD" + } +] diff --git a/aks/dfe_analytics/outputs.tf b/aks/dfe_analytics/outputs.tf new file mode 100644 index 0000000..fe896ec --- /dev/null +++ b/aks/dfe_analytics/outputs.tf @@ -0,0 +1,25 @@ +output "bigquery_project_id" { + description = "ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'..." + value = var.gcp_project_id +} +output "bigquery_table_name" { + description = "Biquery events table name" + value = local.gcp_table_name +} +output "bigquery_dataset" { + description = "Bigquery dataset name" + value = local.gcp_dataset_name +} +output "google_cloud_credentials" { + description = "Credentials for Google workload identity federation" + value = local.gcp_credentials +} +output "variables_map" { + description = "Map of environment variables required for dfe-analytics. Merge with application configuration secrets." + value = { + BIGQUERY_PROJECT_ID = var.gcp_project_id + BIGQUERY_TABLE_NAME = local.gcp_table_name + BIGQUERY_DATASET = local.gcp_dataset_name + GOOGLE_CLOUD_CREDENTIALS = local.gcp_credentials + } +} diff --git a/aks/dfe_analytics/provider.tf b/aks/dfe_analytics/provider.tf new file mode 100644 index 0000000..a04dda8 --- /dev/null +++ b/aks/dfe_analytics/provider.tf @@ -0,0 +1,19 @@ +terraform { + required_version = ">=1.4" + + required_providers { + google = { + source = "hashicorp/google" + version = "6.6.0" + } + azurerm = { + source = "hashicorp/azurerm" + version = ">=3" + } + } +} + +provider "google" { + project = var.gcp_project_id + region = local.gcp_region +} diff --git a/aks/dfe_analytics/resources.tf b/aks/dfe_analytics/resources.tf new file mode 100644 index 0000000..b78b8e6 --- /dev/null +++ b/aks/dfe_analytics/resources.tf @@ -0,0 +1,72 @@ +resource "google_service_account" "appender" { + account_id = "appender-${var.service_short}-${var.environment}" + display_name = "Service Account appender to ${var.service_short} in ${var.environment} environment" +} + +resource "google_service_account_iam_binding" "appender" { + service_account_id = google_service_account.appender.name + role = "roles/iam.workloadIdentityUser" + + members = [ + local.gcp_principal_with_subject + ] +} + +data "google_kms_key_ring" "main" { + count = var.gcp_keyring != null ? 1 : 0 + + name = var.gcp_keyring + location = local.gcp_region +} + +data "google_kms_crypto_key" "main" { + count = var.gcp_key != null ? 1 : 0 + + name = var.gcp_key + key_ring = data.google_kms_key_ring.main[0].id +} + +# Create dataset if it doesn't exist +resource "google_bigquery_dataset" "main" { + count = var.gcp_dataset == null ? 1 : 0 + + dataset_id = local.gcp_dataset_name + location = local.gcp_region + default_encryption_configuration { + kms_key_name = data.google_kms_crypto_key.main[0].id + } +} + +# Add service account permission to dataset, wether we create it or it already exists +resource "google_bigquery_dataset_iam_member" "appender" { + dataset_id = var.gcp_dataset == null ? google_bigquery_dataset.main[0].dataset_id : var.gcp_dataset + role = "projects/${var.gcp_project_id}/roles/bigquery_appender_custom" + member = "serviceAccount:${google_service_account.appender.email}" +} + +# Create table if dataset doesn't exist +resource "google_bigquery_table" "events" { + count = var.gcp_dataset == null ? 1 : 0 + + dataset_id = google_bigquery_dataset.main[0].dataset_id + table_id = local.gcp_table_name + description = "Events streamed into the BigQuery from the application" + clustering = ["event_type"] + deletion_protection = var.gcp_table_deletion_protection + require_partition_filter = false + + encryption_configuration { + kms_key_name = data.google_kms_crypto_key.main[0].id + } + + time_partitioning { + type = "DAY" + field = "occurred_at" + } + + # https://github.com/DFE-Digital/dfe-analytics/blob/main/docs/create-events-table.sql + schema = templatefile( + "${path.module}/files/events.json.tmpl", + { policy_tag_name = local.gcp_policy_tag_name } + ) +} diff --git a/aks/dfe_analytics/variables.tf b/aks/dfe_analytics/variables.tf new file mode 100644 index 0000000..b16eb96 --- /dev/null +++ b/aks/dfe_analytics/variables.tf @@ -0,0 +1,88 @@ +variable "azure_resource_prefix" { + type = string + description = "Prefix of Azure resources for the service. Required" +} +variable "cluster" { + type = string + description = "AKS cluster name e.g. test, production... Required" +} +variable "namespace" { + type = string + description = "AKS Namespace where the service is deployed to. Required" +} +variable "service_short" { + type = string + description = "Short name for the service e.g. att, aytq... Required" +} +variable "environment" { + type = string + description = "Service environment name e.g. production, test, pr-1234... Required" +} + +variable "gcp_dataset" { + type = string + description = "Name of an existing dataset. Optional: if not provided, create a new dataset" + default = null +} +variable "gcp_keyring" { + type = string + description = "Name of an existing keyring. Required when creating the dataset" + default = null +} +variable "gcp_key" { + type = string + description = "Name of an existing customer-managed encryption key (CMEK). Required when creating the dataset" + default = null +} +variable "gcp_project_id" { + type = string + description = "ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... Required" +} +variable "gcp_project_number" { + type = number + description = "Google cloud project number. Required" +} +variable "gcp_taxonomy_id" { + type = number + description = "Policy tags taxonomy ID. Required when creating the dataset" + default = null +} +variable "gcp_policy_tag_id" { + type = number + description = "Policy tag ID. Required when creating the dataset" + default = null +} +variable "gcp_table_deletion_protection" { + type = bool + description = "Prevents deletion of the event table. Default: true" + default = true + nullable = false +} + +locals { + # Global constants + gcp_region = "europe-west2" + gcp_table_name = "events" + gcp_workload_id_pool = "azure-cip-identity-pool" + + gcp_dataset_name = var.gcp_dataset == null ? replace("${var.service_short}_events_${var.environment}_spike", "-", "_") : var.gcp_dataset + gcp_principal = "principal://iam.googleapis.com/projects/${var.gcp_project_number}/locations/global/workloadIdentityPools/${local.gcp_workload_id_pool}" + gcp_principal_with_subject = "${local.gcp_principal}/subject/${data.azurerm_user_assigned_identity.gcp_wif.principal_id}" + + gcp_credentials_map = { + universe_domain = "googleapis.com" + type = "external_account" + audience = "//iam.googleapis.com/projects/${var.gcp_project_number}/locations/global/workloadIdentityPools/azure-cip-identity-pool/providers/azure-cip-oidc-provider" + subject_token_type = "urn:ietf:params:oauth:token-type:jwt" + token_url = "https://sts.googleapis.com/v1/token" + credential_source = { + url = "https://login.microsoftonline.com/${data.azurerm_client_config.current.tenant_id}/oauth2/v2.0/token" + } + service_account_impersonation_url = "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/${google_service_account.appender.email}:generateAccessToken" + service_account_impersonation = { + token_lifetime_seconds = 3600 + } + } + gcp_credentials = jsonencode(local.gcp_credentials_map) + gcp_policy_tag_name = var.gcp_dataset == null ? "projects/${var.gcp_project_id}/locations/${local.gcp_region}/taxonomies/${var.gcp_taxonomy_id}/policyTags/${var.gcp_policy_tag_id}" : "" +} From 952475da808e1cf312f5d4968fec3dcbb01c3bc6 Mon Sep 17 00:00:00 2001 From: Colin Saliceti Date: Tue, 15 Oct 2024 19:07:39 +0100 Subject: [PATCH 2/4] GCP IAM configuration for Github actions Authorise Github action workflow to run terraform in GCP --- aks/dfe_analytics/authorise_workflow.sh | 89 +++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 aks/dfe_analytics/authorise_workflow.sh diff --git a/aks/dfe_analytics/authorise_workflow.sh b/aks/dfe_analytics/authorise_workflow.sh new file mode 100644 index 0000000..056240e --- /dev/null +++ b/aks/dfe_analytics/authorise_workflow.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# Set up Direct Workload Identity Federation +# See https://github.com/google-github-actions/auth?tab=readme-ov-file#preferred-direct-workload-identity-federation + +PROJECT_ID=$1 +REPO=$2 + +if [[ -z "$PROJECT_ID" || -z "$REPO" ]]; then + cat < Date: Fri, 18 Oct 2024 11:53:32 +0100 Subject: [PATCH 3/4] dfe_anlytics README and tfdocs --- aks/dfe_analytics/README.md | 119 ++++++++++++++++++++++++++++++++++++ aks/dfe_analytics/tfdocs.md | 62 +++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 aks/dfe_analytics/README.md create mode 100644 aks/dfe_analytics/tfdocs.md diff --git a/aks/dfe_analytics/README.md b/aks/dfe_analytics/README.md new file mode 100644 index 0000000..102ef56 --- /dev/null +++ b/aks/dfe_analytics/README.md @@ -0,0 +1,119 @@ +# DfE Analytics +Create resources in Google cloud Bigquery and provides the required variables to applications so they can send events. + +## Examples +### Reuse existing dataset and events table + +```hcl +module "dfe_analytics" { + source = "./vendor/modules/dfe-terraform-modules//aks/dfe_analytics" + + azure_resource_prefix = var.azure_resource_prefix + cluster = var.cluster + namespace = var.namespace + service_short = var.service_short + environment = var.environment + gcp_dataset = "events_${var.config}" + gcp_project_id = "apply-for-qts-in-england" + gcp_project_number = 385922361840 +} +``` + +### Create new dataset and events table +Use for a new environment. To get the values for `gcp_taxonomy_id` and `gcp_policy_tag_id` see [Taxonomy and policy tag](#taxonomy-and-policy-tag). +```hcl +module "dfe_analytics" { + source = "./vendor/modules/dfe-terraform-modules//aks/dfe_analytics" + + azure_resource_prefix = var.azure_resource_prefix + cluster = var.cluster + namespace = var.namespace + service_short = var.service_short + environment = var.environment + gcp_keyring = "afqts-key-ring" + gcp_key = "afqts-key" + gcp_project_id = "apply-for-qts-in-england" + gcp_project_number = 385922361840 + gcp_taxonomy_id = 5456044749211275650 + gcp_policy_tag_id = 2399328962407973209 +} +``` + +### Configure application +#### Enable in Ruby +```ruby +DfE::Analytics.configure do |config| +... + config.azure_federated_auth = ENV.include? "GOOGLE_CLOUD_CREDENTIALS" +end +``` + +#### Enable in .NET +TBD + +#### Variables +Each variable is available as a separate output. For convenience, the `variables_map` output provides them all: +- BIGQUERY_PROJECT_ID +- BIGQUERY_TABLE_NAME +- BIGQUERY_DATASET +- GOOGLE_CLOUD_CREDENTIALS + +```hcl +module "application_configuration" { + source = "./vendor/modules/dfe-terraform-modules//aks/application_configuration" + ... + secret_variables = merge( + module.dfe_analytics.variables_map, + { + ... + } + ) +} +``` + +#### Enable on each app that requires it +```hcl +module "worker_application" { + source = "./vendor/modules/dfe-terraform-modules//aks/application" + ... + enable_gcp_wif = true +} +``` + +## Authentication - Command line +The user should have Owner role on the Google project. + +- Run `gcloud auth application-default login` +- Run terraform + +## Authentication - Github actions +We set up workfload identity federation on the Google side and configure the workflow. The user should have Owner role on the Google project. This is done once per repository. + +- Run the `authorise_workflow.sh` located in *aks/dfe_analytics*: + ``` + ./authorise_workflow.sh PROJECT_ID REPO + ``` + Example: + ``` + ./authorise_workflow.sh apply-for-qts-in-england apply-for-qualified-teacher-status + ``` +- The script shows the *permissions* and *google-github-actions/auth step* to add to the workflow job +- Adding the permission removes the [default token permissions](https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#permissions-for-the-github_token), which may be an issue for some actions that rely on them. For example, the [marocchino/sticky-pull-request-comment](https://github.com/marocchino/sticky-pull-request-comment) action requires `pull-requests: write`. It must then be added explicitly. +- Run the workflow + +## Taxonomy and policy tag +The user should have Owner role on the Google project. + +- Authenticate: `gcloud auth application-default login` +- Get projects list: `gcloud projects list` +- Select project e.g.: `gcloud config set project apply-for-qts-in-england` +- Get taxonomies list: + ``` + gcloud data-catalog taxonomies list --location=europe-west2 --format="value(name)" + ``` + The path contains the taxonomy id as a number e.g. 5456044749211275650 +- Get policy tags e.g.: + ``` + gcloud data-catalog taxonomies policy-tags list --taxonomy="projects/apply-for-qts-in-england/locations/europe-west2/taxonomies/5456044749211275650" --location="europe-west2" --filter="displayName:hidden" --format="value(name)" + ``` + The path contains the policy tag id as a number e.g. 2399328962407973209 diff --git a/aks/dfe_analytics/tfdocs.md b/aks/dfe_analytics/tfdocs.md new file mode 100644 index 0000000..b49280d --- /dev/null +++ b/aks/dfe_analytics/tfdocs.md @@ -0,0 +1,62 @@ +## Requirements + +| Name | Version | +|------|---------| +| [terraform](#requirement\_terraform) | >=1.4 | +| [azurerm](#requirement\_azurerm) | >=3 | +| [google](#requirement\_google) | 6.6.0 | + +## Providers + +| Name | Version | +|------|---------| +| [azurerm](#provider\_azurerm) | 4.5.0 | +| [google](#provider\_google) | 6.6.0 | + +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [cluster\_data](#module\_cluster\_data) | ../cluster_data | n/a | + +## Resources + +| Name | Type | +|------|------| +| [google_bigquery_dataset.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset) | resource | +| [google_bigquery_dataset_iam_member.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_dataset_iam_member) | resource | +| [google_bigquery_table.events](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/bigquery_table) | resource | +| [google_service_account.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account) | resource | +| [google_service_account_iam_binding.appender](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/resources/service_account_iam_binding) | resource | +| [azurerm_client_config.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/client_config) | data source | +| [azurerm_user_assigned_identity.gcp_wif](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/user_assigned_identity) | data source | +| [google_kms_crypto_key.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/data-sources/kms_crypto_key) | data source | +| [google_kms_key_ring.main](https://registry.terraform.io/providers/hashicorp/google/6.6.0/docs/data-sources/kms_key_ring) | data source | + +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [azure\_resource\_prefix](#input\_azure\_resource\_prefix) | Prefix of Azure resources for the service. Required | `string` | n/a | yes | +| [cluster](#input\_cluster) | AKS cluster name e.g. test, production... Required | `string` | n/a | yes | +| [environment](#input\_environment) | Service environment name e.g. production, test, pr-1234... Required | `string` | n/a | yes | +| [gcp\_dataset](#input\_gcp\_dataset) | Name of an existing dataset. Optional: if not provided, create a new dataset | `string` | `null` | no | +| [gcp\_key](#input\_gcp\_key) | Name of an existing customer-managed encryption key (CMEK). Required when creating the dataset | `string` | `null` | no | +| [gcp\_keyring](#input\_gcp\_keyring) | Name of an existing keyring. Required when creating the dataset | `string` | `null` | no | +| [gcp\_policy\_tag\_id](#input\_gcp\_policy\_tag\_id) | Policy tag ID. Required when creating the dataset | `number` | `null` | no | +| [gcp\_project\_id](#input\_gcp\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... Required | `string` | n/a | yes | +| [gcp\_project\_number](#input\_gcp\_project\_number) | Google cloud project number. Required | `number` | n/a | yes | +| [gcp\_table\_deletion\_protection](#input\_gcp\_table\_deletion\_protection) | Prevents deletion of the event table. Default: true | `bool` | `true` | no | +| [gcp\_taxonomy\_id](#input\_gcp\_taxonomy\_id) | Policy tags taxonomy ID. Required when creating the dataset | `number` | `null` | no | +| [namespace](#input\_namespace) | AKS Namespace where the service is deployed to. Required | `string` | n/a | yes | +| [service\_short](#input\_service\_short) | Short name for the service e.g. att, aytq... Required | `string` | n/a | yes | + +## Outputs + +| Name | Description | +|------|-------------| +| [bigquery\_dataset](#output\_bigquery\_dataset) | Bigquery dataset name | +| [bigquery\_project\_id](#output\_bigquery\_project\_id) | ID of the Google cloud project e.g. 'rugged-abacus-218110', 'apply-for-qts-in-england'... | +| [bigquery\_table\_name](#output\_bigquery\_table\_name) | Biquery events table name | +| [google\_cloud\_credentials](#output\_google\_cloud\_credentials) | Credentials for Google workload identity federation | +| [variables\_map](#output\_variables\_map) | Map of environment variables required for dfe-analytics. Merge with application configuration secrets. | From 0fcc18468810f848e7158e179b161007ed5c952b Mon Sep 17 00:00:00 2001 From: James Gunn Date: Mon, 21 Oct 2024 15:15:32 +0100 Subject: [PATCH 4/4] Add .NET setup instructions for DfE Analytics --- aks/dfe_analytics/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aks/dfe_analytics/README.md b/aks/dfe_analytics/README.md index 102ef56..4129914 100644 --- a/aks/dfe_analytics/README.md +++ b/aks/dfe_analytics/README.md @@ -49,7 +49,11 @@ end ``` #### Enable in .NET -TBD +```cs +builder.Services.AddDfeAnalytics() + .UseFederatedAksBigQueryClientProvider(); +``` +Ensure the `ProjectNumber`, `WorkloadIdentityPoolName`, `WorkloadIdentityPoolProviderName` and `ServiceAccountEmail` configuration keys are populated within the `DfeAnalytics` configuration section. #### Variables Each variable is available as a separate output. For convenience, the `variables_map` output provides them all: