Skip to content

Commit

Permalink
Config class and S3 clients (#6)
Browse files Browse the repository at this point in the history
* Add Config class

Why these changes are being introduced:
* The application needs a class to manage config values and a stream variable to store the application logs for an email sent at the end of each run.

How this addresses that need:
* Add Config class with corresponding unit tests and fixtures for the methods
* Update cli.py with calls to Config methods and stream variable
* Update README.md
* Update pyproject.toml

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/IN-1108

* Add S3 client

Why these changes are being introduced:
* An S3 client is needed for submission metadata and content files

How this addresses that need:
* Add S3Client class with corresponding unit tests and fixtures for the methods
* Update dependencies

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/IN-1099

* Updates based on discussion in PR #6

* Refactor Config.configure_logger method
* Rename retrieve_file_type_from_bucket > get_files_iter and refactor method
  • Loading branch information
ehanson8 authored Dec 6, 2024
1 parent e13bbb1 commit 8eb29e2
Show file tree
Hide file tree
Showing 10 changed files with 787 additions and 145 deletions.
3 changes: 3 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ verify_ssl = true
name = "pypi"

[packages]
boto3 = "*"
boto3-stubs = {extras = ["essential","ses"], version = "*"}
click = "*"
sentry-sdk = "*"

[dev-packages]
black = "*"
coveralls = "*"
moto="*"
mypy = "*"
pre-commit = "*"
pytest = "*"
Expand Down
604 changes: 509 additions & 95 deletions Pipfile.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@ Description of the app
```shell
SENTRY_DSN=### If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development.
WORKSPACE=### Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform.
AWS_REGION_NAME=### Default AWS region.
```

### Optional

```shell
LOG_LEVEL=### Logging level. Defaults to 'INFO'.
```


10 changes: 7 additions & 3 deletions dsc/cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
from datetime import timedelta
from io import StringIO
from time import perf_counter

import click

from dsc.config import configure_logger, configure_sentry
from dsc.config import Config

logger = logging.getLogger(__name__)
CONFIG = Config()


@click.command()
Expand All @@ -15,9 +17,11 @@
)
def main(*, verbose: bool) -> None:
start_time = perf_counter()
stream = StringIO()
root_logger = logging.getLogger()
logger.info(configure_logger(root_logger, verbose=verbose))
logger.info(configure_sentry())
logger.info(CONFIG.configure_logger(root_logger, stream, verbose=verbose))
logger.info(CONFIG.configure_sentry())
CONFIG.check_required_env_vars()
logger.info("Running process")

# Do things here!
Expand Down
82 changes: 56 additions & 26 deletions dsc/config.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,63 @@
import logging
import os
from collections.abc import Iterable
from io import StringIO

import sentry_sdk


def configure_logger(logger: logging.Logger, *, verbose: bool) -> str:
if verbose:
logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s.%(funcName)s() line %(lineno)d: "
"%(message)s"
)
logger.setLevel(logging.DEBUG)
for handler in logging.root.handlers:
handler.addFilter(logging.Filter("dsc"))
else:
logging.basicConfig(
format="%(asctime)s %(levelname)s %(name)s.%(funcName)s(): %(message)s"
class Config:
REQUIRED_ENV_VARS: Iterable[str] = [
"WORKSPACE",
"SENTRY_DSN",
"AWS_REGION_NAME",
]

OPTIONAL_ENV_VARS: Iterable[str] = ["LOG_LEVEL"]

def __getattr__(self, name: str) -> str | None:
"""Provide dot notation access to configurations and env vars on this class."""
if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS:
return os.getenv(name)
message = f"'{name}' not a valid configuration variable"
raise AttributeError(message)

def check_required_env_vars(self) -> None:
"""Method to raise exception if required env vars not set."""
missing_vars = [var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)]
if missing_vars:
message = f"Missing required environment variables: {', '.join(missing_vars)}"
raise OSError(message)

def configure_logger(
self, logger: logging.Logger, stream: StringIO, *, verbose: bool
) -> str:
logging_format_base = "%(asctime)s %(levelname)s %(name)s.%(funcName)s()"
logger.addHandler(logging.StreamHandler(stream))

if verbose:
log_method, log_level = logger.debug, logging.DEBUG
template = logging_format_base + " line %(lineno)d: %(message)s"
for handler in logging.root.handlers:
handler.addFilter(logging.Filter("dsc"))
else:
log_method, log_level = logger.info, logging.INFO
template = logging_format_base + ": %(message)s"

logger.setLevel(log_level)
logging.basicConfig(format=template)
logger.addHandler(logging.StreamHandler(stream))
log_method(f"{logging.getLevelName(logger.getEffectiveLevel())}")

return (
f"Logger '{logger.name}' configured with level="
f"{logging.getLevelName(logger.getEffectiveLevel())}"
)
logger.setLevel(logging.INFO)
return (
f"Logger '{logger.name}' configured with level="
f"{logging.getLevelName(logger.getEffectiveLevel())}"
)


def configure_sentry() -> str:
env = os.getenv("WORKSPACE")
sentry_dsn = os.getenv("SENTRY_DSN")
if sentry_dsn and sentry_dsn.lower() != "none":
sentry_sdk.init(sentry_dsn, environment=env)
return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
return "No Sentry DSN found, exceptions will not be sent to Sentry"

def configure_sentry(self) -> str:
env = self.WORKSPACE
sentry_dsn = self.SENTRY_DSN
if sentry_dsn and sentry_dsn.lower() != "none":
sentry_sdk.init(sentry_dsn, environment=env)
return f"Sentry DSN found, exceptions will be sent to Sentry with env={env}"
return "No Sentry DSN found, exceptions will not be sent to Sentry"
80 changes: 80 additions & 0 deletions dsc/s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING

import boto3

if TYPE_CHECKING:
from collections.abc import Iterator

from mypy_boto3_s3.type_defs import PutObjectOutputTypeDef

logger = logging.getLogger(__name__)


class S3Client:
"""A class to perform common S3 operations for this application."""

def __init__(self) -> None:
self.client = boto3.client("s3")

def archive_file_with_new_key(
self, bucket: str, key: str, archived_key_prefix: str
) -> None:
"""Update the key of the specified file to archive it from processing.
Args:
bucket: The S3 bucket containing the files to be archived.
key: The key of the file to archive.
archived_key_prefix: The prefix to be applied to the archived file.
"""
self.client.copy_object(
Bucket=bucket,
CopySource=f"{bucket}/{key}",
Key=f"{archived_key_prefix}/{key}",
)
self.client.delete_object(
Bucket=bucket,
Key=key,
)

def put_file(
self, file_content: str | bytes, bucket: str, key: str
) -> PutObjectOutputTypeDef:
"""Put a file in a specified S3 bucket with a specified key.
Args:
file_content: The content of the file to be uploaded.
bucket: The S3 bucket where the file will be uploaded.
key: The key to be used for the uploaded file.
"""
response = self.client.put_object(
Body=file_content,
Bucket=bucket,
Key=key,
)
logger.debug(f"'{key}' uploaded to S3")
return response

def get_files_iter(
self, bucket: str, file_type: str, excluded_key_prefix: str
) -> Iterator[str]:
"""Retrieve file based on file type, bucket, and without excluded prefix.
Args:
bucket: The S3 bucket to search.
file_type: The file type to retrieve.
excluded_key_prefix: Files with this key prefix will not be retrieved.
"""
paginator = self.client.get_paginator("list_objects_v2")
page_iterator = paginator.paginate(Bucket=bucket)

for page in page_iterator:
files = [
content["Key"]
for content in page["Contents"]
if content["Key"].endswith(file_type)
and excluded_key_prefix not in content["Key"]
]
yield from files
11 changes: 5 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@ ignore = [
# default
"ANN101",
"ANN102",
"COM812",
"D107",
"N812",
"PTH",

# project-specific
"C90",
"COM812",
"D100",
"D101",
"D102",
"D103",
"D104",
"D107",
"G004",
"N812",
"PLR0912",
"PLR0913",
"PLR0915",
"PTH",
"S320",
"S321",
]
Expand Down
31 changes: 31 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,44 @@
from io import StringIO

import boto3
import pytest
from click.testing import CliRunner
from moto import mock_aws

from dsc.config import Config
from dsc.s3 import S3Client


@pytest.fixture(autouse=True)
def _test_env(monkeypatch):
monkeypatch.setenv("SENTRY_DSN", "None")
monkeypatch.setenv("WORKSPACE", "test")
monkeypatch.setenv("AWS_REGION_NAME", "us-east-1")


@pytest.fixture
def config_instance() -> Config:
return Config()


@pytest.fixture
def mocked_s3(config_instance):
with mock_aws():
s3 = boto3.client("s3", region_name=config_instance.AWS_REGION_NAME)
s3.create_bucket(Bucket="awd")
yield s3


@pytest.fixture
def runner():
return CliRunner()


@pytest.fixture
def s3_client():
return S3Client()


@pytest.fixture
def stream():
return StringIO()
38 changes: 23 additions & 15 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,45 @@
import logging

from dsc.config import configure_logger, configure_sentry
import pytest


def test_configure_logger_not_verbose():
def test_check_required_env_vars(monkeypatch, config_instance):
monkeypatch.delenv("WORKSPACE")
with pytest.raises(OSError, match="Missing required environment variables:"):
config_instance.check_required_env_vars()


def test_configure_logger_not_verbose(config_instance, stream):
logger = logging.getLogger(__name__)
result = configure_logger(logger, verbose=False)
info_log_level = 20
assert logger.getEffectiveLevel() == info_log_level
result = config_instance.configure_logger(logger, stream, verbose=False)
assert logger.getEffectiveLevel() == logging.INFO
assert result == "Logger 'tests.test_config' configured with level=INFO"
stream.seek(0)
assert next(stream) == "INFO\n"


def test_configure_logger_verbose():
def test_configure_logger_verbose(config_instance, stream):
logger = logging.getLogger(__name__)
result = configure_logger(logger, verbose=True)
debug_log_level = 10
assert logger.getEffectiveLevel() == debug_log_level
result = config_instance.configure_logger(logger, stream, verbose=True)
assert logger.getEffectiveLevel() == logging.DEBUG
assert result == "Logger 'tests.test_config' configured with level=DEBUG"
stream.seek(0)
assert next(stream) == "DEBUG\n"


def test_configure_sentry_no_env_variable(monkeypatch):
def test_configure_sentry_no_env_variable(monkeypatch, config_instance):
monkeypatch.delenv("SENTRY_DSN", raising=False)
result = configure_sentry()
result = config_instance.configure_sentry()
assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"


def test_configure_sentry_env_variable_is_none(monkeypatch):
def test_configure_sentry_env_variable_is_none(monkeypatch, config_instance):
monkeypatch.setenv("SENTRY_DSN", "None")
result = configure_sentry()
result = config_instance.configure_sentry()
assert result == "No Sentry DSN found, exceptions will not be sent to Sentry"


def test_configure_sentry_env_variable_is_dsn(monkeypatch):
def test_configure_sentry_env_variable_is_dsn(monkeypatch, config_instance):
monkeypatch.setenv("SENTRY_DSN", "https://1234567890@00000.ingest.sentry.io/123456")
result = configure_sentry()
result = config_instance.configure_sentry()
assert result == "Sentry DSN found, exceptions will be sent to Sentry with env=test"
Loading

0 comments on commit 8eb29e2

Please sign in to comment.