From aa057c943873546f4a81bc620e47e90e3227d751 Mon Sep 17 00:00:00 2001 From: Abram Booth Date: Wed, 25 Jan 2023 12:48:52 -0500 Subject: [PATCH] osf.metrics monthly report plumbing --- .../commands/monthly_reporters_go.py | 79 +++++++++++++++++++ osf/metrics/reporters/__init__.py | 5 ++ osf/metrics/reporters/_base.py | 16 +++- osf/metrics/reports.py | 20 ++++- osf/metrics/utils.py | 9 +++ website/settings/defaults.py | 6 ++ 6 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 osf/management/commands/monthly_reporters_go.py diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py new file mode 100644 index 00000000000..26d5e06d119 --- /dev/null +++ b/osf/management/commands/monthly_reporters_go.py @@ -0,0 +1,79 @@ +from datetime import timedelta +import logging +import re + +from django.core.management.base import BaseCommand +from django.utils import timezone + +from framework import sentry +from framework.celery_tasks import app as celery_app +from osf.metrics.reporters import MONTHLY_REPORTERS +from osf.metrics.utils import YearMonth +from website.app import init_app + + +logger = logging.getLogger(__name__) + + +MAXMONTH = 12 + + +@celery_app.task(name='management.commands.monthly_reporters_go') +def monthly_reporters_go(report_year=None, report_month=None): + init_app() # OSF-specific setup + + if report_year is None and report_month is None: + # default to last month + today = timezone.now().date() + if today.month == 1: + report_yearmonth = YearMonth( + year=today.year - 1, + month=MAXMONTH, + ) + else: + report_yearmonth = YearMonth( + year=today.year, + month=today.month - 1, + ) + else: + assert report_year and report_month + report_yearmonth = YearMonth(report_year, report_month) + + errors = {} + for reporter_class in MONTHLY_REPORTERS: + try: + reporter_class().run_and_record_for_month(report_yearmonth) + except Exception as e: + errors[reporter_class.__name__] = str(e) + logger.exception(e) + sentry.log_exception() + # continue with the next reporter + return errors + + +def parse_yearmonth(input_str): + match = re.fullmatch(r'(?P\d{4})-(?P\d{2})', input_str) + if match: + return { + 'year': int(match.group('year')), + 'month': int(match.group('month')), + } + else: + raise ValueError(f'could not parse yearmonth (expected "YYYY-MM"), got "{input_str}"') + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + 'yearmonth', + type=parse_yearmonth, + default={'year': None, 'month': None}, + help='year and month (YYYY-MM)', + ) + def handle(self, *args, **options): + errors = monthly_reporters_go( + report_date=options.get('date'), + ) + for error_key, error_val in errors: + self.stdout.write(self.style.ERROR(f'error running {error_key}: ') + error_val) + self.stdout.write(self.style.SUCCESS('done.')) diff --git a/osf/metrics/reporters/__init__.py b/osf/metrics/reporters/__init__.py index 733ff0f2320..19cd591400f 100644 --- a/osf/metrics/reporters/__init__.py +++ b/osf/metrics/reporters/__init__.py @@ -20,3 +20,8 @@ PreprintCountReporter, UserCountReporter, ) + + +MONTHLY_REPORTERS = ( + # TODO +) diff --git a/osf/metrics/reporters/_base.py b/osf/metrics/reporters/_base.py index e37a1ee1ac9..a46650942c9 100644 --- a/osf/metrics/reporters/_base.py +++ b/osf/metrics/reporters/_base.py @@ -3,13 +3,27 @@ import logging import pytz -from website.settings import KEEN as keen_settings from keen.client import KeenClient +from osf.metrics.utils import YearMonth +from website.settings import KEEN as keen_settings + logger = logging.getLogger(__name__) +class MonthlyReporter: + def report(self, report_yearmonth: YearMonth): + """build a report for the given month + """ + raise NotImplementedError(f'{self.__name__} must implement `report`') + + def run_and_record_for_month(self, report_yearmonth: YearMonth): + report = self.report(report_yearmonth) + report.report_yearmonth = str(report_yearmonth) + report.save() + + class DailyReporter: def report(self, report_date): """build reports for the given date diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index aacfc5c009a..95639a2ab4e 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -1,9 +1,9 @@ from django.dispatch import receiver from elasticsearch_dsl import InnerDoc from elasticsearch_metrics import metrics -from elasticsearch_metrics.signals import pre_save +from elasticsearch_metrics.signals import pre_save as metrics_pre_save -from osf.metrics.utils import stable_key +from osf.metrics.utils import stable_key, YearMonth class ReportInvalid(Exception): @@ -28,7 +28,19 @@ class Meta: source = metrics.MetaField(enabled=True) -@receiver(pre_save) +class MonthlyReport(metrics.Metric): + """MonthlyReport (abstract base for report-based metrics that run monthly) + """ + + report_yearmonth = metrics.Date(format='strict_year_month', required=True) + + class Meta: + abstract = True + dynamic = metrics.MetaField('strict') + source = metrics.MetaField(enabled=True) + + +@receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): # Set the document id to a hash of "unique together" # values (just `report_date` by default) to get @@ -44,6 +56,8 @@ def set_report_id(sender, instance, **kwargs): if not duf_value or not isinstance(duf_value, str): raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})') instance.meta.id = stable_key(instance.report_date, duf_value) + elif issubclass(sender, MonthlyReport): + instance.meta.id = stable_key(instance.report_yearmonth) #### BEGIN reusable inner objects ##### diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 75628ae00ec..67c76a44676 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -1,3 +1,4 @@ +import typing from hashlib import sha256 @@ -13,3 +14,11 @@ def stable_key(*key_parts): plain_key = '|'.join(map(str, key_parts)) return sha256(bytes(plain_key, encoding='utf')).hexdigest() + + +class YearMonth(typing.NamedTuple): + year: int + month: int + + def __str__(self): + return f'{self.year}-{self.month}' diff --git a/website/settings/defaults.py b/website/settings/defaults.py index 1fa3882e0b7..1b522160bcc 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -430,6 +430,7 @@ class CeleryConfig: 'osf.management.commands.populate_branched_from', 'osf.management.commands.cumulative_plos_metrics', 'osf.management.commands.daily_reporters_go', + 'osf.management.commands.monthly_reporters_go', } med_pri_modules = { @@ -527,6 +528,7 @@ class CeleryConfig: 'osf.management.commands.cumulative_plos_metrics', 'api.providers.tasks', 'osf.management.commands.daily_reporters_go', + 'osf.management.commands.monthly_reporters_go', ) # Modules that need metrics and release requirements @@ -625,6 +627,10 @@ class CeleryConfig: 'schedule': crontab(minute=0, hour=6), # Daily 1:00 a.m. 'kwargs': {'also_send_to_keen': True}, }, + 'monthly_reporters_go': { + 'task': 'management.commands.monthly_reporters_go', + 'schedule': crontab(minute=30, hour=6, day_of_month=2), # Second day of month 1:30 a.m. + }, # 'data_storage_usage': { # 'task': 'management.commands.data_storage_usage', # 'schedule': crontab(day_of_month=1, minute=30, hour=4), # Last of the month at 11:30 p.m.