diff --git a/containers/jml-extract-lambda/CHANGELOG.md b/containers/jml-extract-lambda/CHANGELOG.md new file mode 100644 index 0000000000..bba61207ca --- /dev/null +++ b/containers/jml-extract-lambda/CHANGELOG.md @@ -0,0 +1,22 @@ + + +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +## [1.0.1] - 2024-01-17 + +### Changed + +- Updated ECR repository name + +## [1.0.0] - 2023-09-12 + +### Added + +- initial implementation of lambda function for extracting JML data on a schedule diff --git a/containers/jml-extract-lambda/Dockerfile b/containers/jml-extract-lambda/Dockerfile new file mode 100644 index 0000000000..72674b85e2 --- /dev/null +++ b/containers/jml-extract-lambda/Dockerfile @@ -0,0 +1,12 @@ +FROM public.ecr.aws/lambda/python:3.11 + +ARG VERSION + +ENV BASE_VERSION="${VERSION}" + +COPY src/var/task ${LAMDA_TASK_ROOT} + +RUN python -m pip install --no-cache-dir --upgrade pip==23.3.2 \ + && python -m pip install --no-cache-dir --requirement requirements.txt + +CMD ["handler.handler"] diff --git a/containers/jml-extract-lambda/config.json b/containers/jml-extract-lambda/config.json new file mode 100644 index 0000000000..f7f41b8a9c --- /dev/null +++ b/containers/jml-extract-lambda/config.json @@ -0,0 +1,11 @@ +{ + "name": "jml-extract-lambda", + "version": "1.0.1", + "registry": "ecr", + "ecr": { + "role": "arn:aws:iam::013433889002:role/modernisation-platform-oidc-cicd", + "account": "374269020027", + "region": "eu-west-2", + "repository": "data-platform-jml-extract-lambda-ecr-repo" + } +} diff --git a/containers/jml-extract-lambda/src/var/task/handler.py b/containers/jml-extract-lambda/src/var/task/handler.py new file mode 100644 index 0000000000..b2a1868e3e --- /dev/null +++ b/containers/jml-extract-lambda/src/var/task/handler.py @@ -0,0 +1,83 @@ +import os +from datetime import datetime as dt +from urllib.error import HTTPError + +import awswrangler as wr +import boto3 +import pandas as pd +from notifications_python_client import prepare_upload +from notifications_python_client.notifications import NotificationsAPIClient + + +def handler(event, context): + SECRET_ID = os.environ["SECRET_ID"] + LOG_GROUP_NAMES = os.environ["LOG_GROUP_NAMES"] + EMAIL_SECRET = os.environ["EMAIL_SECRET"] + TEMPLATE_ID = os.environ["TEMPLATE_ID"] + + secrets_client = boto3.client("secretsmanager") + + response = secrets_client.get_secret_value(SecretId=SECRET_ID) + api_key = response["SecretString"] + + notifications_client = NotificationsAPIClient(api_key) + + response = secrets_client.get_secret_value(SecretId=EMAIL_SECRET) + email_address = response["SecretString"] + + now = dt.now() + current_date = dt.strftime(now.date(), "%Y/%m/%d") + + # Query + query = f"""fields detail.data.user_id as `User`, detail.data.user_name as `Employee Email Address` + | filter detail.data.type = "s" + | filter detail.data.connection = "github" + | stats max(@timestamp) as `Last login date` by "{current_date}" as `Effective Date of Data`, \ + `User`, `Employee Email Address` + | sort `Last login date` desc + """ + + # Date range + year = dt.now().year + current_month = dt.now().month + previous_month = current_month - 1 + end_datetime = dt(year, current_month, 1, 0, 0, 0) + if previous_month == 0: + previous_month = 12 + end_datetime = dt(year, current_month, 1, 0, 0, 0) + year = year - 1 + + start_datetime = dt(year, previous_month, 1, 0, 0, 0) + + # Read logs + dataframe = wr.cloudwatch.read_logs( + log_group_names=LOG_GROUP_NAMES, + query=query, + start_time=start_datetime, + end_time=end_datetime, + ) + + # Datestamp + datestamp = end_datetime.strftime(format="%Y_%m_%d") + dataframe["Last login date"] = pd.to_datetime( + dataframe["Last login date"], format="%Y-%m-%d %H:%M:%S.%f" + ).apply(lambda x: str(dt.strftime(x, "%Y/%m/%d"))) + + # Save to Excel + excel_filename = f"/tmp/jml_extract_{datestamp}.xlsx" + dataframe.to_excel(excel_filename, index=False, sheet_name="Data") + + # Send email notification + with open(excel_filename, "rb") as f: + try: + response = notifications_client.send_email_notification( + email_address=email_address, + template_id=TEMPLATE_ID, + personalisation={ + "date": current_date, + "link_to_file": prepare_upload(f), + }, + ) + except HTTPError as e: + print(e) + raise e diff --git a/containers/jml-extract-lambda/src/var/task/requirements.txt b/containers/jml-extract-lambda/src/var/task/requirements.txt new file mode 100644 index 0000000000..1eef869111 --- /dev/null +++ b/containers/jml-extract-lambda/src/var/task/requirements.txt @@ -0,0 +1,6 @@ +awswrangler==3.5.1 +boto3==1.34.19 +botocore==1.34.19 +pandas==2.1.4 +notifications-python-client==9.0.0 +openpyxl==3.1.2