Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new relic monitoring #138

Merged
merged 6 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ RUN apt-get -y update && apt-get -y --no-install-recommends install \
make gcc libc-dev libgeos-dev musl-dev libpq-dev libffi-dev

RUN pip install --upgrade pip && pip install pipenv==v2022.11.30
RUN pip install newrelic

COPY Pipfile Pipfile
COPY Pipfile.lock Pipfile.lock
Expand All @@ -29,4 +30,11 @@ RUN apt-get clean \

COPY ./app /app/app
COPY wait_for_postgres.sh /usr/local/bin/wait_for_postgres.sh
RUN chmod +x /usr/local/bin/wait_for_postgres.sh
COPY app/settings/start.sh /app/start.sh
COPY newrelic.ini /app/newrelic.ini

RUN chmod +x /usr/local/bin/wait_for_postgres.sh

RUN chmod +x /app/start.sh

ENTRYPOINT [ "/app/start.sh" ]
47 changes: 47 additions & 0 deletions app/settings/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#! /usr/bin/env sh
set -e

if [ -f /app/app/main.py ]; then
DEFAULT_MODULE_NAME=app.main
elif [ -f /app/main.py ]; then
DEFAULT_MODULE_NAME=main
fi
MODULE_NAME=${MODULE_NAME:-$DEFAULT_MODULE_NAME}
VARIABLE_NAME=${VARIABLE_NAME:-app}
export APP_MODULE=${APP_MODULE:-"$MODULE_NAME:$VARIABLE_NAME"}

if [ -f /app/gunicorn_conf.py ]; then
DEFAULT_GUNICORN_CONF=/app/gunicorn_conf.py
elif [ -f /app/app/gunicorn_conf.py ]; then
DEFAULT_GUNICORN_CONF=/app/app/gunicorn_conf.py
else
DEFAULT_GUNICORN_CONF=/gunicorn_conf.py
fi
export GUNICORN_CONF=${GUNICORN_CONF:-$DEFAULT_GUNICORN_CONF}
export WORKER_CLASS=${WORKER_CLASS:-"uvicorn.workers.UvicornWorker"}

# If there's a prestart.sh script in the /app directory or other path specified, run it before starting
PRE_START_PATH=${PRE_START_PATH:-/app/prestart.sh}
echo "Checking for script in $PRE_START_PATH"
if [ -f $PRE_START_PATH ] ; then
echo "Running script $PRE_START_PATH"
. "$PRE_START_PATH"
else
echo "There is no script $PRE_START_PATH"
fi

export NEW_RELIC_LICENSE_KEY=$(jq -nr 'env.NEW_RELIC_LICENSE_KEY' | jq '.license_key' | sed 's/"//g')
NEW_RELIC_CONFIG_FILE=/app/newrelic.ini
export NEW_RELIC_CONFIG_FILE

if [ "${ENV}" = "staging" ]; then
export NEW_RELIC_ENVIRONMENT=staging
# Start Gunicorn
exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
elif [ "${ENV}" = "production" ]; then
export NEW_RELIC_ENVIRONMENT=production
# Start Gunicorn
exec newrelic-admin run-program gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
else
exec gunicorn -k "$WORKER_CLASS" -c "$GUNICORN_CONF" "$APP_MODULE"
fi
2 changes: 1 addition & 1 deletion docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ services:
- ENV=production
ports:
- 8088:80
entrypoint: wait_for_postgres.sh /start.sh
entrypoint: wait_for_postgres.sh /app/start.sh
depends_on:
- database

Expand Down
212 changes: 212 additions & 0 deletions newrelic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# ---------------------------------------------------------------------------

#
# This file configures the New Relic Python Agent.
#
# The path to the configuration file should be supplied to the function
# newrelic.agent.initialize() when the agent is being initialized.
#
# The configuration file follows a structure similar to what you would
# find for Microsoft Windows INI files. For further information on the
# configuration file format see the Python ConfigParser documentation at:
#
# http://docs.python.org/library/configparser.html
#
# For further discussion on the behaviour of the Python agent that can
# be configured via this configuration file see:
#
# https://docs.newrelic.com/docs/apm/agents/python-agent/configuration/python-agent-configuration/
#

# ---------------------------------------------------------------------------

# Here are the settings that are common to all environments.

[newrelic]

# You must specify the license key associated with your New
# Relic account. This key binds the Python Agent's data to your
# account in the New Relic service.
# license_key = <license_key>

# The application name. Set this to be the name of your
# application as you would like it to show up in New Relic UI.
# The UI will then auto-map instances of your application into a
# entry on your home dashboard page.
app_name = GFW Tile Cache Service

# New Relic offers distributed tracing for monitoring and analyzing modern
# distributed systems.Enable distributed tracing.
# distributed_tracing.enabled = false

# When "true", the agent collects performance data about your
# application and reports this data to the New Relic UI at
# newrelic.com. This global switch is normally overridden for
# each environment below.
monitor_mode = true

# Sets the name of a file to log agent messages to. Useful for
# debugging any issues with the agent. This is not set by
# default as it is not known in advance what user your web
# application processes will run as and where they have
# permission to write to. Whatever you set this to you must
# ensure that the permissions for the containing directory and
# the file itself are correct, and that the user that your web
# application runs as can write to the file. If not able to
# write out a log file, it is also possible to say "stderr" and
# output to standard error output. This would normally result in
# output appearing in your web server log.
#log_file = /tmp/newrelic-python-agent.log

# Sets the level of detail of messages sent to the log file, if
# a log file location has been provided. Possible values, in
# increasing order of detail, are: "critical", "error", "warning",
# "info" and "debug". When reporting any agent issues to New
# Relic technical support, the most useful setting for the
# support engineers is "debug". However, this can generate a lot
# of information very quickly, so it is best not to keep the
# agent at this level for longer than it takes to reproduce the
# problem you are experiencing.
log_level = info

# The Python Agent communicates with the New Relic service using
# SSL by default. Note that this does result in an increase in
# CPU overhead, over and above what would occur for a non SSL
# connection, to perform the encryption involved in the SSL
# communication. This work is though done in a distinct thread
# to those handling your web requests, so it should not impact
# response times. You can if you wish revert to using a non SSL
# connection, but this will result in information being sent
# over a plain socket connection and will not be as secure.
ssl = true

# High Security Mode enforces certain security settings, and
# prevents them from being overridden, so that no sensitive data
# is sent to New Relic. Enabling High Security Mode means that
# SSL is turned on, request parameters are not collected, and SQL
# can not be sent to New Relic in its raw form. To activate High
# Security Mode, it must be set to 'true' in this local .ini
# configuration file AND be set to 'true' in the server-side
# configuration in the New Relic user interface. For details, see
# https://docs.newrelic.com/docs/subscriptions/high-security
high_security = false

# The Python Agent will attempt to connect directly to the New
# Relic service. If there is an intermediate firewall between
# your host and the New Relic service that requires you to use a
# HTTP proxy, then you should set both the "proxy_host" and
# "proxy_port" settings to the required values for the HTTP
# proxy. The "proxy_user" and "proxy_pass" settings should
# additionally be set if proxy authentication is implemented by
# the HTTP proxy. The "proxy_scheme" setting dictates what
# protocol scheme is used in talking to the HTTP proxy. This
# would normally always be set as "http" which will result in the
# agent then using a SSL tunnel through the HTTP proxy for end to
# end encryption.
# proxy_scheme = http
# proxy_host = hostname
# proxy_port = 8080
# proxy_user =
# proxy_pass =

# Capturing request parameters is off by default. To enable the
# capturing of request parameters, first ensure that the setting
# "attributes.enabled" is set to "true" (the default value), and
# then add "request.parameters.*" to the "attributes.include"
# setting. For details about attributes configuration, please
# consult the documentation.
attributes.include = request.parameters.*

# The transaction tracer captures deep information about slow
# transactions and sends this to the UI on a periodic basis. The
# transaction tracer is enabled by default. Set this to "false"
# to turn it off.
transaction_tracer.enabled = true

# Threshold in seconds for when to collect a transaction trace.
# When the response time of a controller action exceeds this
# threshold, a transaction trace will be recorded and sent to
# the UI. Valid values are any positive float value, or (default)
# "apdex_f", which will use the threshold for a dissatisfying
# Apdex controller action - four times the Apdex T value.
transaction_tracer.transaction_threshold = apdex_f

# When the transaction tracer is on, SQL statements can
# optionally be recorded. The recorder has three modes, "off"
# which sends no SQL, "raw" which sends the SQL statement in its
# original form, and "obfuscated", which strips out numeric and
# string literals.
transaction_tracer.record_sql = obfuscated

# Threshold in seconds for when to collect stack trace for a SQL
# call. In other words, when SQL statements exceed this
# threshold, then capture and send to the UI the current stack
# trace. This is helpful for pinpointing where long SQL calls
# originate from in an application.
transaction_tracer.stack_trace_threshold = 0.5

# Determines whether the agent will capture query plans for slow
# SQL queries. Only supported in MySQL and PostgreSQL. Set this
# to "false" to turn it off.
transaction_tracer.explain_enabled = true

# Threshold for query execution time below which query plans
# will not not be captured. Relevant only when "explain_enabled"
# is true.
transaction_tracer.explain_threshold = 0.5

# Space separated list of function or method names in form
# 'module:function' or 'module:class.function' for which
# additional function timing instrumentation will be added.
transaction_tracer.function_trace =

# The error collector captures information about uncaught
# exceptions or logged exceptions and sends them to UI for
# viewing. The error collector is enabled by default. Set this
# to "false" to turn it off.
error_collector.enabled = true

# To stop specific errors from reporting to the UI, set this to
# a space separated list of the Python exception type names to
# ignore. The exception name should be of the form 'module:class'.
error_collector.ignore_errors =

# Browser monitoring is the Real User Monitoring feature of the UI.
# For those Python web frameworks that are supported, this
# setting enables the auto-insertion of the browser monitoring
# JavaScript fragments.
browser_monitoring.auto_instrument = true

# A thread profiling session can be scheduled via the UI when
# this option is enabled. The thread profiler will periodically
# capture a snapshot of the call stack for each active thread in
# the application to construct a statistically representative
# call tree.
thread_profiler.enabled = true

# ---------------------------------------------------------------------------

#
# The application environments. These are specific settings which
# override the common environment settings. The settings related to a
# specific environment will be used when the environment argument to the
# newrelic.agent.initialize() function has been defined to be either
# "development", "test", "staging" or "production".
#

[newrelic:development]
monitor_mode = false

[newrelic:test]
monitor_mode = false

[newrelic:staging]
app_name = GFW Tile Cache Service (Staging)
monitor_mode = true
distributed_tracing.enabled = false

[newrelic:production]
monitor_mode = true
distributed_tracing.enabled = true

# ---------------------------------------------------------------------------
13 changes: 13 additions & 0 deletions terraform/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@ data "template_file" "container_definition" {
tile_cache_url = local.tile_cache_url
raster_tiler_lambda_name = module.lambda_raster_tiler.lambda_name
tiles_bucket_name = module.storage.tiles_bucket_name
new_relic_license_key_arn = data.aws_secretsmanager_secret.newrelic_license.arn
data_lake_bucket_name = local.data_lake_bucket_name
}
}

data "aws_secretsmanager_secret" "newrelic_license" {
name = var.newrelic_license_key_secret
}

data "aws_iam_policy_document" "read_new_relic_lic" {
statement {
actions = ["secretsmanager:GetSecretValue"]
resources = [data.aws_secretsmanager_secret.newrelic_license.arn]
effect = "Allow"
}
}
8 changes: 7 additions & 1 deletion terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ module "orchestration" {
task_execution_role_policies = [
data.terraform_remote_state.core.outputs.secrets_postgresql-reader_policy_arn,
data.terraform_remote_state.core.outputs.secrets_planet_api_key_policy_arn,
data.terraform_remote_state.core.outputs.secrets_read-gfw-api-token_policy_arn
data.terraform_remote_state.core.outputs.secrets_read-gfw-api-token_policy_arn,
aws_iam_policy.read_new_relic_secret.arn
]
container_definition = data.template_file.container_definition.rendered
}
Expand Down Expand Up @@ -98,4 +99,9 @@ module "lambda_raster_tiler" {
tags = local.tags
data_lake_bucket_name = local.data_lake_bucket_name
tile_cache_url = local.tile_cache_url
}

resource "aws_iam_policy" "read_new_relic_secret" {
name = substr("${local.project}-read_new-relic_secret${local.name_suffix}", 0, 64)
policy = data.aws_iam_policy_document.read_new_relic_lic.json
}
2 changes: 1 addition & 1 deletion terraform/modules/storage/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ resource "aws_iam_policy" "s3_update_bucket_policy" {
name = "${var.project}-s3_update_bucket_policy${var.name_suffix}"
policy = data.template_file.s3_update_bucket_policy.rendered

}
}
5 changes: 4 additions & 1 deletion terraform/templates/container_definition.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@
{
"name": "TOKEN_SECRET",
"valueFrom": "${token_secret_arn}"
},
{
"name": "NEW_RELIC_LICENSE_KEY",
"valueFrom": "${new_relic_license_key_arn}"
}

],
"portMappings": [
{
Expand Down
6 changes: 6 additions & 0 deletions terraform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ variable "lambda_runtime" {
default = "python3.10"
}

variable "newrelic_license_key_secret" {
type = string
description = "New Relic license key secret name"
default = "newrelic/license_key"
}

variable "data_lake_bucket_name" {
description = "Data Lake S3 bucket name"
default = ""
Expand Down
2 changes: 1 addition & 1 deletion terraform/vars/terraform-production.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ auto_scaling_min_capacity = 2
auto_scaling_max_capacity = 15
fargate_cpu = 8192
fargate_memory = 16384
data_lake_bucket_name = "gfw-data-lake"
data_lake_bucket_name = "gfw-data-lake"
2 changes: 1 addition & 1 deletion terraform/vars/terraform-staging.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ auto_scaling_min_capacity = 1
auto_scaling_max_capacity = 15
fargate_cpu = 8192
fargate_memory = 16384
data_lake_bucket_name = "gfw-data-lake-staging"
data_lake_bucket_name = "gfw-data-lake-staging"
Loading