Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

osf.metrics monthly report plumbing #10312

Draft
wants to merge 1 commit into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions osf/management/commands/monthly_reporters_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from datetime import timedelta
import logging
import re

from django.core.management.base import BaseCommand
from django.utils import timezone

from framework import sentry
from framework.celery_tasks import app as celery_app
from osf.metrics.reporters import MONTHLY_REPORTERS
from osf.metrics.utils import YearMonth
from website.app import init_app


logger = logging.getLogger(__name__)


MAXMONTH = 12


@celery_app.task(name='management.commands.monthly_reporters_go')
def monthly_reporters_go(report_year=None, report_month=None):
init_app() # OSF-specific setup

if report_year is None and report_month is None:
# default to last month
today = timezone.now().date()
if today.month == 1:
report_yearmonth = YearMonth(
year=today.year - 1,
month=MAXMONTH,
)
else:
report_yearmonth = YearMonth(
year=today.year,
month=today.month - 1,
)
else:
assert report_year and report_month
report_yearmonth = YearMonth(report_year, report_month)

errors = {}
for reporter_class in MONTHLY_REPORTERS:
try:
reporter_class().run_and_record_for_month(report_yearmonth)
except Exception as e:
errors[reporter_class.__name__] = str(e)
logger.exception(e)
sentry.log_exception()
# continue with the next reporter
return errors


def parse_yearmonth(input_str):
match = re.fullmatch(r'(?P<year>\d{4})-(?P<month>\d{2})', input_str)
if match:
return {
'year': int(match.group('year')),
'month': int(match.group('month')),
}
else:
raise ValueError(f'could not parse yearmonth (expected "YYYY-MM"), got "{input_str}"')


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'yearmonth',
type=parse_yearmonth,
default={'year': None, 'month': None},
help='year and month (YYYY-MM)',
)
def handle(self, *args, **options):
errors = monthly_reporters_go(
report_date=options.get('date'),
)
for error_key, error_val in errors:
self.stdout.write(self.style.ERROR(f'error running {error_key}: ') + error_val)
self.stdout.write(self.style.SUCCESS('done.'))
5 changes: 5 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,8 @@
PreprintCountReporter,
UserCountReporter,
)


MONTHLY_REPORTERS = (
# TODO
)
16 changes: 15 additions & 1 deletion osf/metrics/reporters/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,27 @@
import logging
import pytz

from website.settings import KEEN as keen_settings
from keen.client import KeenClient

from osf.metrics.utils import YearMonth
from website.settings import KEEN as keen_settings


logger = logging.getLogger(__name__)


class MonthlyReporter:
def report(self, report_yearmonth: YearMonth):
"""build a report for the given month
"""
raise NotImplementedError(f'{self.__name__} must implement `report`')

def run_and_record_for_month(self, report_yearmonth: YearMonth):
report = self.report(report_yearmonth)
report.report_yearmonth = str(report_yearmonth)
report.save()


class DailyReporter:
def report(self, report_date):
"""build reports for the given date
Expand Down
20 changes: 17 additions & 3 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from django.dispatch import receiver
from elasticsearch_dsl import InnerDoc
from elasticsearch_metrics import metrics
from elasticsearch_metrics.signals import pre_save
from elasticsearch_metrics.signals import pre_save as metrics_pre_save

from osf.metrics.utils import stable_key
from osf.metrics.utils import stable_key, YearMonth


class ReportInvalid(Exception):
Expand All @@ -28,7 +28,19 @@ class Meta:
source = metrics.MetaField(enabled=True)


@receiver(pre_save)
class MonthlyReport(metrics.Metric):
"""MonthlyReport (abstract base for report-based metrics that run monthly)
"""

report_yearmonth = metrics.Date(format='strict_year_month', required=True)

class Meta:
abstract = True
dynamic = metrics.MetaField('strict')
source = metrics.MetaField(enabled=True)


@receiver(metrics_pre_save)
def set_report_id(sender, instance, **kwargs):
# Set the document id to a hash of "unique together"
# values (just `report_date` by default) to get
Expand All @@ -44,6 +56,8 @@ def set_report_id(sender, instance, **kwargs):
if not duf_value or not isinstance(duf_value, str):
raise ReportInvalid(f'{sender.__name__}.{duf_name} MUST have a non-empty string value (got {duf_value})')
instance.meta.id = stable_key(instance.report_date, duf_value)
elif issubclass(sender, MonthlyReport):
instance.meta.id = stable_key(instance.report_yearmonth)


#### BEGIN reusable inner objects #####
Expand Down
9 changes: 9 additions & 0 deletions osf/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import typing
from hashlib import sha256


Expand All @@ -13,3 +14,11 @@ def stable_key(*key_parts):

plain_key = '|'.join(map(str, key_parts))
return sha256(bytes(plain_key, encoding='utf')).hexdigest()


class YearMonth(typing.NamedTuple):
year: int
month: int

def __str__(self):
return f'{self.year}-{self.month}'
6 changes: 6 additions & 0 deletions website/settings/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,7 @@ class CeleryConfig:
'osf.management.commands.populate_branched_from',
'osf.management.commands.cumulative_plos_metrics',
'osf.management.commands.daily_reporters_go',
'osf.management.commands.monthly_reporters_go',
}

med_pri_modules = {
Expand Down Expand Up @@ -527,6 +528,7 @@ class CeleryConfig:
'osf.management.commands.cumulative_plos_metrics',
'api.providers.tasks',
'osf.management.commands.daily_reporters_go',
'osf.management.commands.monthly_reporters_go',
)

# Modules that need metrics and release requirements
Expand Down Expand Up @@ -625,6 +627,10 @@ class CeleryConfig:
'schedule': crontab(minute=0, hour=6), # Daily 1:00 a.m.
'kwargs': {'also_send_to_keen': True},
},
'monthly_reporters_go': {
'task': 'management.commands.monthly_reporters_go',
'schedule': crontab(minute=30, hour=6, day_of_month=2), # Second day of month 1:30 a.m.
},
# 'data_storage_usage': {
# 'task': 'management.commands.data_storage_usage',
# 'schedule': crontab(day_of_month=1, minute=30, hour=4), # Last of the month at 11:30 p.m.
Expand Down